From f0edecd755ba24afdd97eb6bacef1e4bb91596ae Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Tue, 6 Nov 2018 14:23:19 +0300 Subject: [PATCH 001/586] Respect 'receive_timeout' option on client-side. --- .gitignore | 1 + dbms/programs/client/Client.cpp | 67 +++++++++++++++------ dbms/programs/client/ConnectionParameters.h | 2 +- dbms/programs/server/TCPHandler.cpp | 4 +- dbms/src/Client/Connection.h | 6 ++ 5 files changed, 59 insertions(+), 21 deletions(-) diff --git a/.gitignore b/.gitignore index 8359edbabde..585a4074767 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ /build /build_* +/build-* /docs/build /docs/edit /docs/tools/venv/ diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index bf57d072f6b..2bce38ac073 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -1028,25 +1028,53 @@ private: InterruptListener interrupt_listener; bool cancelled = false; + // TODO: get the poll_interval from commandline. + const auto receive_timeout = connection->getTimeouts().receive_timeout; + constexpr size_t default_poll_interval = 1000000, min_poll_interval = 5000; /// in microseconds + const size_t poll_interval + = std::max(min_poll_interval, std::min(receive_timeout.totalMicroseconds(), default_poll_interval)); + while (true) { - /// Has the Ctrl+C been pressed and thus the query should be cancelled? - /// If this is the case, inform the server about it and receive the remaining packets - /// to avoid losing sync. - if (!cancelled) - { - if (interrupt_listener.check()) - { - connection->sendCancel(); - cancelled = true; - if (is_interactive) - std::cout << "Cancelling query." << std::endl; + Stopwatch watch(CLOCK_MONOTONIC_COARSE); - /// Pressing Ctrl+C twice results in shut down. - interrupt_listener.unblock(); + while (true) + { + /// Has the Ctrl+C been pressed and thus the query should be cancelled? + /// If this is the case, inform the server about it and receive the remaining packets + /// to avoid losing sync. + if (!cancelled) + { + auto cancelQuery = [&] { + connection->sendCancel(); + cancelled = true; + if (is_interactive) + std::cout << "Cancelling query." << std::endl; + + /// Pressing Ctrl+C twice results in shut down. + interrupt_listener.unblock(); + }; + + if (interrupt_listener.check()) + { + cancelQuery(); + } else { + double elapsed = watch.elapsedSeconds(); + if (elapsed > receive_timeout.totalSeconds()) + { + std::cout << "Timeout exceeded while receiving data from server." + << " Waited for " << static_cast(elapsed) << " seconds," + << " timeout is " << receive_timeout.totalSeconds() << " seconds." << std::endl; + + cancelQuery(); + } + } } - else if (!connection->poll(1000000)) - continue; /// If there is no new data, continue checking whether the query was cancelled after a timeout. + + /// Poll for changes after a cancellation check, otherwise it never reached + /// because of progress updates from server. + if (connection->poll(poll_interval)) + break; } if (!receiveAndProcessPacket()) @@ -1634,9 +1662,12 @@ public: } /// Extract settings from the options. -#define EXTRACT_SETTING(TYPE, NAME, DEFAULT, DESCRIPTION) \ - if (options.count(#NAME)) \ - context.setSetting(#NAME, options[#NAME].as()); +#define EXTRACT_SETTING(TYPE, NAME, DEFAULT, DESCRIPTION) \ + if (options.count(#NAME)) \ + { \ + context.setSetting(#NAME, options[#NAME].as()); \ + config().setString(#NAME, options[#NAME].as()); \ + } APPLY_FOR_SETTINGS(EXTRACT_SETTING) #undef EXTRACT_SETTING diff --git a/dbms/programs/client/ConnectionParameters.h b/dbms/programs/client/ConnectionParameters.h index 68bc3728349..af937005c35 100644 --- a/dbms/programs/client/ConnectionParameters.h +++ b/dbms/programs/client/ConnectionParameters.h @@ -75,8 +75,8 @@ struct ConnectionParameters timeouts = ConnectionTimeouts( Poco::Timespan(config.getInt("connect_timeout", DBMS_DEFAULT_CONNECT_TIMEOUT_SEC), 0), - Poco::Timespan(config.getInt("receive_timeout", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0), Poco::Timespan(config.getInt("send_timeout", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0), + Poco::Timespan(config.getInt("receive_timeout", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0), Poco::Timespan(config.getInt("tcp_keep_alive_timeout", 0), 0)); } }; diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index 5c9b3a2d86d..9d6d8af8822 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -299,10 +299,10 @@ void TCPHandler::runImpl() void TCPHandler::readData(const Settings & global_settings) { - auto receive_timeout = query_context.getSettingsRef().receive_timeout.value; + const auto receive_timeout = query_context.getSettingsRef().receive_timeout.value; /// Poll interval should not be greater than receive_timeout - size_t default_poll_interval = global_settings.poll_interval.value * 1000000; + const size_t default_poll_interval = global_settings.poll_interval.value * 1000000; size_t current_poll_interval = static_cast(receive_timeout.totalMicroseconds()); constexpr size_t min_poll_interval = 5000; // 5 ms size_t poll_interval = std::max(min_poll_interval, std::min(default_poll_interval, current_poll_interval)); diff --git a/dbms/src/Client/Connection.h b/dbms/src/Client/Connection.h index d8229fc3463..e290e9b0c70 100644 --- a/dbms/src/Client/Connection.h +++ b/dbms/src/Client/Connection.h @@ -117,6 +117,12 @@ public: UInt16 getPort() const; const String & getDefaultDatabase() const; + /// For proper polling. + inline const auto & getTimeouts() const + { + return timeouts; + } + /// If last flag is true, you need to call sendExternalTablesData after. void sendQuery( const String & query, From d846c0a9a63fa522ff7371d6c438ea7ba9adfe35 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Tue, 6 Nov 2018 14:38:21 +0300 Subject: [PATCH 002/586] Report progress from server-side to keep client connections alive. This way we make sure that 'receive_timeout' on client-side is triggered only when there is a real socket read timeout. --- dbms/programs/server/TCPHandler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index 9d6d8af8822..9fdd59d6388 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -398,7 +398,7 @@ void TCPHandler::processOrdinaryQuery() } else { - if (state.progress.rows && after_send_progress.elapsed() / 1000 >= query_context.getSettingsRef().interactive_delay) + if (after_send_progress.elapsed() / 1000 >= query_context.getSettingsRef().interactive_delay) { /// Some time passed and there is a progress. after_send_progress.restart(); From d7e25e143952707ad3121180c6ebf873ace83963 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 14 Dec 2018 22:28:37 +0300 Subject: [PATCH 003/586] Always build ODBC bridge as a separate binary #3360 --- dbms/programs/CMakeLists.txt | 20 +++++++++---------- dbms/programs/main.cpp | 6 ------ dbms/programs/odbc-bridge/CMakeLists.txt | 10 ++++++---- dbms/src/Common/SharedLibrary.cpp | 6 +++--- dbms/src/Common/SharedLibrary.h | 5 +++-- .../Dictionaries/LibraryDictionarySource.cpp | 2 +- 6 files changed, 22 insertions(+), 27 deletions(-) diff --git a/dbms/programs/CMakeLists.txt b/dbms/programs/CMakeLists.txt index 9d7c6f2cda1..613b21cf48b 100644 --- a/dbms/programs/CMakeLists.txt +++ b/dbms/programs/CMakeLists.txt @@ -28,11 +28,18 @@ add_subdirectory (copier) add_subdirectory (format) add_subdirectory (clang) add_subdirectory (obfuscator) -add_subdirectory (odbc-bridge) + +if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) + add_subdirectory (odbc-bridge) +endif () if (CLICKHOUSE_SPLIT_BINARY) set (CLICKHOUSE_ALL_TARGETS clickhouse-server clickhouse-client clickhouse-local clickhouse-benchmark clickhouse-performance-test - clickhouse-extract-from-config clickhouse-compressor clickhouse-format clickhouse-copier clickhouse-odbc-bridge) + clickhouse-extract-from-config clickhouse-compressor clickhouse-format clickhouse-copier) + + if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) + list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-odbc-bridge) + endif () if (USE_EMBEDDED_COMPILER) list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-clang clickhouse-lld) @@ -85,9 +92,6 @@ else () if (USE_EMBEDDED_COMPILER) target_link_libraries (clickhouse PRIVATE clickhouse-compiler-lib) endif () - if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) - target_link_libraries (clickhouse PRIVATE clickhouse-odbc-bridge-lib) - endif() set (CLICKHOUSE_BUNDLE) if (ENABLE_CLICKHOUSE_SERVER) @@ -140,12 +144,6 @@ else () install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-obfuscator DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) list(APPEND CLICKHOUSE_BUNDLE clickhouse-obfuscator) endif () - if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) - add_custom_target (clickhouse-odbc-bridge ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-odbc-bridge DEPENDS clickhouse) - install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-odbc-bridge DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) - list(APPEND CLICKHOUSE_BUNDLE clickhouse-odbc-bridge) - endif () - # install always because depian package want this files: add_custom_target (clickhouse-clang ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-clang DEPENDS clickhouse) diff --git a/dbms/programs/main.cpp b/dbms/programs/main.cpp index 29d64213d9c..112803dab57 100644 --- a/dbms/programs/main.cpp +++ b/dbms/programs/main.cpp @@ -56,9 +56,6 @@ int mainEntryClickHouseClusterCopier(int argc, char ** argv); #if ENABLE_CLICKHOUSE_OBFUSCATOR int mainEntryClickHouseObfuscator(int argc, char ** argv); #endif -#if ENABLE_CLICKHOUSE_ODBC_BRIDGE || !defined(ENABLE_CLICKHOUSE_ODBC_BRIDGE) -int mainEntryClickHouseODBCBridge(int argc, char ** argv); -#endif #if USE_EMBEDDED_COMPILER @@ -105,9 +102,6 @@ std::pair clickhouse_applications[] = #if ENABLE_CLICKHOUSE_OBFUSCATOR {"obfuscator", mainEntryClickHouseObfuscator}, #endif -#if ENABLE_CLICKHOUSE_ODBC_BRIDGE || !defined(ENABLE_CLICKHOUSE_ODBC_BRIDGE) - {"odbc-bridge", mainEntryClickHouseODBCBridge}, -#endif #if USE_EMBEDDED_COMPILER {"clang", mainEntryClickHouseClang}, diff --git a/dbms/programs/odbc-bridge/CMakeLists.txt b/dbms/programs/odbc-bridge/CMakeLists.txt index a57c8c9c8cf..f7667aaea18 100644 --- a/dbms/programs/odbc-bridge/CMakeLists.txt +++ b/dbms/programs/odbc-bridge/CMakeLists.txt @@ -33,7 +33,9 @@ if (ENABLE_TESTS) add_subdirectory (tests) endif () -if (CLICKHOUSE_SPLIT_BINARY) - add_executable (clickhouse-odbc-bridge odbc-bridge.cpp) - target_link_libraries (clickhouse-odbc-bridge PRIVATE clickhouse-odbc-bridge-lib) -endif () +# clickhouse-odbc-bridge is always a separate binary. +# Reason: it must not export symbols from SSL, mariadb-client, etc. to not break ABI compatibility with ODBC drivers. + +add_executable (clickhouse-odbc-bridge odbc-bridge.cpp) +target_link_libraries (clickhouse-odbc-bridge PRIVATE clickhouse-odbc-bridge-lib) +install (TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) diff --git a/dbms/src/Common/SharedLibrary.cpp b/dbms/src/Common/SharedLibrary.cpp index 92083055098..30ed3bccaab 100644 --- a/dbms/src/Common/SharedLibrary.cpp +++ b/dbms/src/Common/SharedLibrary.cpp @@ -1,9 +1,9 @@ #include "SharedLibrary.h" #include -#include #include #include "Exception.h" + namespace DB { namespace ErrorCodes @@ -12,9 +12,9 @@ namespace ErrorCodes extern const int CANNOT_DLSYM; } -SharedLibrary::SharedLibrary(const std::string & path) +SharedLibrary::SharedLibrary(const std::string & path, int flags) { - handle = dlopen(path.c_str(), RTLD_LAZY); + handle = dlopen(path.c_str(), flags); if (!handle) throw Exception(std::string("Cannot dlopen: ") + dlerror(), ErrorCodes::CANNOT_DLOPEN); } diff --git a/dbms/src/Common/SharedLibrary.h b/dbms/src/Common/SharedLibrary.h index 96c8f6fe025..9d2b9bc7843 100644 --- a/dbms/src/Common/SharedLibrary.h +++ b/dbms/src/Common/SharedLibrary.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -8,12 +9,12 @@ namespace DB { - /** Allows you to open a dynamic library and get a pointer to a function from it. +/** Allows you to open a dynamic library and get a pointer to a function from it. */ class SharedLibrary : private boost::noncopyable { public: - explicit SharedLibrary(const std::string & path); + explicit SharedLibrary(const std::string & path, int flags = RTLD_LAZY); ~SharedLibrary(); diff --git a/dbms/src/Dictionaries/LibraryDictionarySource.cpp b/dbms/src/Dictionaries/LibraryDictionarySource.cpp index eec291321ad..fe6a294c1ac 100644 --- a/dbms/src/Dictionaries/LibraryDictionarySource.cpp +++ b/dbms/src/Dictionaries/LibraryDictionarySource.cpp @@ -135,7 +135,7 @@ LibraryDictionarySource::LibraryDictionarySource( "LibraryDictionarySource: Can't load lib " + toString() + ": " + Poco::File(path).path() + " - File doesn't exist", ErrorCodes::FILE_DOESNT_EXIST); description.init(sample_block); - library = std::make_shared(path); + library = std::make_shared(path, RTLD_LAZY | RTLD_DEEPBIND); settings = std::make_shared(getLibSettings(config, config_prefix + lib_config_settings)); if (auto libNew = library->tryGetstrings), decltype(&ClickHouseLibrary::log))>( "ClickHouseDictionary_v3_libNew")) From 6f986495ddd24fca45017e2b8ff3a0bf81a647d4 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 25 Dec 2018 21:42:43 +0300 Subject: [PATCH 004/586] made index parser --- dbms/src/Parsers/ASTIndexDeclaration.h | 51 ++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 dbms/src/Parsers/ASTIndexDeclaration.h diff --git a/dbms/src/Parsers/ASTIndexDeclaration.h b/dbms/src/Parsers/ASTIndexDeclaration.h new file mode 100644 index 00000000000..c13a9ab03e2 --- /dev/null +++ b/dbms/src/Parsers/ASTIndexDeclaration.h @@ -0,0 +1,51 @@ +#pragma once + +#include +#include +#include +#include + +#include + + +namespace DB +{ + + +/** Index name(expr) TYPE typename(args) in create query + */ +class ASTIndexQuery : public IAST +{ +public: + struct Index + { + String name; + ASTPtr expression_list; + ASTFunction type; + }; + + using Indexes = std::vector; + Indexes indexes; + + /** Get the text that identifies this element. */ + String getID(char) const override { return "Index"; } + + ASTPtr clone() const override { return std::make_shared(*this); } + + void formatImpl(const FormatSettings & s, FormatState &state, FormatStateStacked frame) const override + { + for (ASTIndexQuery::Indexes::const_iterator it = indexes.begin(); it != indexes.end(); ++it) + { + if (it != indexes.begin()) + s.ostr << s.nl_or_ws; + + s.ostr << (s.hilite ? hilite_keyword : "") << "INDEX" << (s.hilite ? hilite_none : "") << " " << it->name; + s.ostr << "("; + it->expression_list->formatImpl(s, state, frame); + s.ostr << ") " << (s.hilite ? hilite_keyword : "") << "TYPE" << (s.hilite ? hilite_none : ""); + it->type.formatImpl(s, state, frame); + } + } +}; + +} From fcd49afc2aade520ddb83690f9d157b32b444e83 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 25 Dec 2018 21:45:08 +0300 Subject: [PATCH 005/586] added index parsing --- dbms/src/Parsers/ASTCreateQuery.h | 11 +++++ dbms/src/Parsers/ASTIndexDeclaration.h | 43 +++++++++--------- dbms/src/Parsers/ParserCreateQuery.cpp | 55 ++++++++++++++++++++++++ dbms/src/Parsers/ParserCreateQuery.h | 19 +++++++- dbms/src/Parsers/tests/create_parser.cpp | 2 +- 5 files changed, 105 insertions(+), 25 deletions(-) diff --git a/dbms/src/Parsers/ASTCreateQuery.h b/dbms/src/Parsers/ASTCreateQuery.h index 840ba345813..82062fbab9d 100644 --- a/dbms/src/Parsers/ASTCreateQuery.h +++ b/dbms/src/Parsers/ASTCreateQuery.h @@ -19,6 +19,7 @@ public: IAST * primary_key = nullptr; IAST * order_by = nullptr; IAST * sample_by = nullptr; + std::vector indexes; ASTSetQuery * settings = nullptr; String getID(char) const override { return "Storage definition"; } @@ -38,6 +39,12 @@ public: res->set(res->order_by, order_by->clone()); if (sample_by) res->set(res->sample_by, sample_by->clone()); + + for (const auto& index : indexes) { + res->indexes.emplace_back(nullptr); + res->set(res->indexes.back(), index->clone()); + } + if (settings) res->set(res->settings, settings->clone()); @@ -71,6 +78,10 @@ public: s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << "SAMPLE BY " << (s.hilite ? hilite_none : ""); sample_by->formatImpl(s, state, frame); } + for (const auto& index : indexes) { + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << "INDEX " << (s.hilite ? hilite_none : ""); + index->formatImpl(s, state, frame); + } if (settings) { s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << "SETTINGS " << (s.hilite ? hilite_none : ""); diff --git a/dbms/src/Parsers/ASTIndexDeclaration.h b/dbms/src/Parsers/ASTIndexDeclaration.h index c13a9ab03e2..3195a9c4bfe 100644 --- a/dbms/src/Parsers/ASTIndexDeclaration.h +++ b/dbms/src/Parsers/ASTIndexDeclaration.h @@ -11,40 +11,37 @@ namespace DB { - /** Index name(expr) TYPE typename(args) in create query */ -class ASTIndexQuery : public IAST +class ASTIndexDeclaration : public IAST { public: - struct Index - { - String name; - ASTPtr expression_list; - ASTFunction type; - }; - - using Indexes = std::vector; - Indexes indexes; + String name; + IAST * expr; + ASTFunction * type; + //TODO: params (GRANULARITY number or SETTINGS a=b, c=d, ..)? /** Get the text that identifies this element. */ String getID(char) const override { return "Index"; } - ASTPtr clone() const override { return std::make_shared(*this); } + ASTPtr clone() const override { + auto res = std::make_shared(*this); + res->children.clear(); + + if (expr) + res->set(res->expr, expr->clone()); + if (type) + res->set(res->type, type->clone()); + return res; + } void formatImpl(const FormatSettings & s, FormatState &state, FormatStateStacked frame) const override { - for (ASTIndexQuery::Indexes::const_iterator it = indexes.begin(); it != indexes.end(); ++it) - { - if (it != indexes.begin()) - s.ostr << s.nl_or_ws; - - s.ostr << (s.hilite ? hilite_keyword : "") << "INDEX" << (s.hilite ? hilite_none : "") << " " << it->name; - s.ostr << "("; - it->expression_list->formatImpl(s, state, frame); - s.ostr << ") " << (s.hilite ? hilite_keyword : "") << "TYPE" << (s.hilite ? hilite_none : ""); - it->type.formatImpl(s, state, frame); - } + s.ostr << name; + s.ostr << (s.hilite ? hilite_keyword : "") << " BY " << (s.hilite ? hilite_none : ""); + expr->formatImpl(s, state, frame); + s.ostr << (s.hilite ? hilite_keyword : "") << " TYPE " << (s.hilite ? hilite_none : ""); + type->formatImpl(s, state, frame); } }; diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index f44b7d35eb6..54a12c64a22 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -90,6 +91,43 @@ bool ParserColumnDeclarationList::parseImpl(Pos & pos, ASTPtr & node, Expected & .parse(pos, node, expected); } +bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword s_by("BY"); + ParserKeyword s_type("TYPE"); + + ParserIdentifier name_p; + ParserIdentifierWithOptionalParameters ident_with_optional_params_p; + ParserExpression expression_p; + + ASTPtr name; + ASTPtr expr; + ASTPtr type; + + if (!name_p.parse(pos, name, expected)) + return false; + + if (!s_by.ignore(pos, expected)) + return false; + + if (!expression_p.parse(pos, expr, expected)) + return false; + + if (!s_type.ignore(pos, expected)) + return false; + + if (!ident_with_optional_params_p.parse(pos, type, expected)) + return false; + + auto index = std::make_shared(); + index->name = typeid_cast(*name).name; + index->set(index->expr, expr); + index->set(index->type, type); + node = index; + + return true; +} + bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { @@ -99,17 +137,20 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKeyword s_primary_key("PRIMARY KEY"); ParserKeyword s_order_by("ORDER BY"); ParserKeyword s_sample_by("SAMPLE BY"); + ParserKeyword s_index("INDEX"); ParserKeyword s_settings("SETTINGS"); ParserIdentifierWithOptionalParameters ident_with_optional_params_p; ParserExpression expression_p; ParserSetQuery settings_p(/* parse_only_internals_ = */ true); + ParserIndexDeclaration index_p; ASTPtr engine; ASTPtr partition_by; ASTPtr primary_key; ASTPtr order_by; ASTPtr sample_by; + ASTs indexes; ASTPtr settings; if (!s_engine.ignore(pos, expected)) @@ -154,6 +195,14 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; } + if (s_index.ignore(pos, expected)) { + indexes.emplace_back(nullptr); + if (index_p.parse(pos, indexes.back(), expected)) + continue; + else + return false; + } + if (s_settings.ignore(pos, expected)) { if (!settings_p.parse(pos, settings, expected)) @@ -169,6 +218,12 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) storage->set(storage->primary_key, primary_key); storage->set(storage->order_by, order_by); storage->set(storage->sample_by, sample_by); + + for (const auto& index : indexes) { + storage->indexes.emplace_back(nullptr); + storage->set(storage->indexes.back(), index); + } + storage->set(storage->settings, settings); node = storage; diff --git a/dbms/src/Parsers/ParserCreateQuery.h b/dbms/src/Parsers/ParserCreateQuery.h index 1084f86d1eb..1529b413008 100644 --- a/dbms/src/Parsers/ParserCreateQuery.h +++ b/dbms/src/Parsers/ParserCreateQuery.h @@ -199,7 +199,24 @@ protected: }; -/** ENGINE = name [PARTITION BY expr] [ORDER BY expr] [PRIMARY KEY expr] [SAMPLE BY expr] [SETTINGS name = value, ...] */ +/** + * INDEX name(column1, column2, ...) TYPE typename(arg1, arg2, ...) + */ +class ParserIndexDeclaration : public IParserBase +{ +public: + ParserIndexDeclaration() {} + +protected: + const char * getName() const override { return "INDEX query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + + +/** + * ENGINE = name [PARTITION BY expr] [ORDER BY expr] [PRIMARY KEY expr] [SAMPLE BY expr] + * [INDEX name(expr) TYPE type(args) ...] [SETTINGS name = value, ...] + */ class ParserStorage : public IParserBase { protected: diff --git a/dbms/src/Parsers/tests/create_parser.cpp b/dbms/src/Parsers/tests/create_parser.cpp index 6137d4d18da..8a591dfa496 100644 --- a/dbms/src/Parsers/tests/create_parser.cpp +++ b/dbms/src/Parsers/tests/create_parser.cpp @@ -10,7 +10,7 @@ int main(int, char **) { using namespace DB; - std::string input = "CREATE TABLE hits (URL String, UserAgentMinor2 FixedString(2), EventTime DateTime) ENGINE = Log"; + std::string input = "CREATE TABLE hits (URL String, UserAgentMinor2 FixedString(2), EventTime DateTime) ENGINE = MergeTree() ORDER BY EventTime INDEX minmax1 BY (lower(URL), EventTime) TYPE MINMAX(1,2,3)"; ParserCreateQuery parser; ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0); From 36083e103609566318ee80a16862aa67e6a220b9 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 25 Dec 2018 22:37:39 +0300 Subject: [PATCH 006/586] some fixes --- dbms/src/Interpreters/InterpreterCreateQuery.cpp | 3 ++- dbms/src/Storages/StorageFactory.cpp | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 66ce6bb3fb1..0f5f421c984 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -103,7 +103,8 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) const ASTStorage & storage = *create.storage; const ASTFunction & engine = *storage.engine; /// Currently, there are no database engines, that support any arguments. - if (engine.arguments || engine.parameters || storage.partition_by || storage.primary_key || storage.order_by || storage.sample_by || storage.settings) + if (engine.arguments || engine.parameters || storage.partition_by || storage.primary_key + || storage.order_by || storage.sample_by || !storage.indexes.empty() || storage.settings) { std::stringstream ostr; formatAST(storage, ostr, false, false); diff --git a/dbms/src/Storages/StorageFactory.cpp b/dbms/src/Storages/StorageFactory.cpp index 333933d449f..b1072eb0b36 100644 --- a/dbms/src/Storages/StorageFactory.cpp +++ b/dbms/src/Storages/StorageFactory.cpp @@ -94,7 +94,8 @@ StoragePtr StorageFactory::get( ErrorCodes::BAD_ARGUMENTS); } - if ((storage_def->partition_by || storage_def->primary_key || storage_def->order_by || storage_def->sample_by) + if ((storage_def->partition_by || storage_def->primary_key || storage_def->order_by + || storage_def->sample_by || !storage_def->indexes.empty()) && !endsWith(name, "MergeTree")) { throw Exception( From 04a8ea85afad831360b80dd6b4129be6b6ee8604 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 26 Dec 2018 15:19:24 +0300 Subject: [PATCH 007/586] added index interface and factory --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 1 + dbms/src/Storages/MergeTree/MergeTreeData.h | 1 + .../Storages/MergeTree/MergeTreeIndexes.cpp | 31 ++++++++++ .../src/Storages/MergeTree/MergeTreeIndexes.h | 60 +++++++++++++++++++ .../MergeTree/registerStorageMergeTree.cpp | 12 +++- dbms/src/Storages/StorageMergeTree.cpp | 3 +- dbms/src/Storages/StorageMergeTree.h | 1 + 7 files changed, 105 insertions(+), 4 deletions(-) create mode 100644 dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp create mode 100644 dbms/src/Storages/MergeTree/MergeTreeIndexes.h diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index b65d23f47e1..b8d80efde66 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -94,6 +94,7 @@ MergeTreeData::MergeTreeData( const ASTPtr & order_by_ast_, const ASTPtr & primary_key_ast_, const ASTPtr & sample_by_ast_, + const ASTs & indexes_ast_, const MergingParams & merging_params_, const MergeTreeSettings & settings_, bool require_part_metadata_, diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index b8f01c40077..2587a7ad085 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -306,6 +306,7 @@ public: const ASTPtr & order_by_ast_, const ASTPtr & primary_key_ast_, const ASTPtr & sample_by_ast_, /// nullptr, if sampling is not supported. + const ASTs & indexes_ast_, const MergingParams & merging_params_, const MergeTreeSettings & settings_, bool require_part_metadata_, diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp new file mode 100644 index 00000000000..c685ebc1b71 --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp @@ -0,0 +1,31 @@ +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int INCORRECT_QUERY; +} + +void MergeTreeIndexFactory::registerIndex(const std::string &name, Creator creator) { + if (!indexes.emplace(name, std::move(creator)).second) + throw Exception("MergeTreeIndexFactory: the Index creator name '" + name + "' is not unique", + ErrorCodes::LOGICAL_ERROR); +} + +IMergeTreeIndex MergeTreeIndexFactory::get(const ASTIndexDeclaration & node) const { + if (!node.type) + throw Exception( + "for INDEX TYPE is required", + ErrorCodes::INCORRECT_QUERY); + auto it = indexes.find(node.type->name); + if (it == indexes.end()) + throw Exception( + "Unknown Index type '" + node.type->name + "'", + ErrorCodes::INCORRECT_QUERY); + return it->second(node); +} + +} \ No newline at end of file diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h new file mode 100644 index 00000000000..b6bb008d808 --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h @@ -0,0 +1,60 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class IMergeTreeIndex; +using MergeTreeIndexes = std::vector; + + +/// Interface for secondary MergeTree indexes +class IMergeTreeIndex +{ +public: + virtual void load(const MergeTreeData & storage, const String & part_path) = 0; + virtual void store(const MergeTreeData & storage, const String & part_path, + MergeTreeDataPartChecksums & checksums) const = 0; + + virtual void update(const Block & block, const Names & column_names) = 0; + virtual void merge(const IMergeTreeIndex & other) = 0; + + virtual bool alwaysUnknownOrTrue() const = 0; + virtual bool maybeTrue() const = 0; + + String name; + ExpressionActionsPtr expr; + Block header; +}; + + +class MergeTreeIndexFactory : public ext::singleton +{ + friend class ext::singleton; + +public: + using Creator = std::function; + +protected: + MergeTreeIndexFactory() {}; + + IMergeTreeIndex get(const ASTIndexDeclaration & node) const; + + void registerIndex(const std::string & name, Creator creator); + + const auto & getAllIndexes() const { + return indexes; + } + +private: + using Indexes = std::unordered_map; + Indexes indexes; +}; + +} \ No newline at end of file diff --git a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp index 54b092fdb62..a3da1885771 100644 --- a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -336,7 +336,8 @@ static StoragePtr create(const StorageFactory::Arguments & args) */ bool is_extended_storage_def = - args.storage_def->partition_by || args.storage_def->primary_key || args.storage_def->order_by || args.storage_def->sample_by || args.storage_def->settings; + args.storage_def->partition_by || args.storage_def->primary_key || args.storage_def->order_by + || args.storage_def->sample_by || !args.storage_def->indexes.empty() || args.storage_def->settings; String name_part = args.engine_name.substr(0, args.engine_name.size() - strlen("MergeTree")); @@ -559,6 +560,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) ASTPtr order_by_ast; ASTPtr primary_key_ast; ASTPtr sample_by_ast; + ASTs indexes_ast; MergeTreeSettings storage_settings = args.context.getMergeTreeSettings(); if (is_extended_storage_def) @@ -579,6 +581,10 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (args.storage_def->sample_by) sample_by_ast = args.storage_def->sample_by->ptr(); + for (auto& index : args.storage_def->indexes) { + indexes_ast.push_back(index->ptr()); + } + storage_settings.loadFromQuery(*args.storage_def); } else @@ -615,13 +621,13 @@ static StoragePtr create(const StorageFactory::Arguments & args) zookeeper_path, replica_name, args.attach, args.data_path, args.database_name, args.table_name, args.columns, args.context, date_column_name, partition_by_ast, order_by_ast, primary_key_ast, - sample_by_ast, merging_params, storage_settings, + sample_by_ast, indexes_ast, merging_params, storage_settings, args.has_force_restore_data_flag); else return StorageMergeTree::create( args.data_path, args.database_name, args.table_name, args.columns, args.attach, args.context, date_column_name, partition_by_ast, order_by_ast, primary_key_ast, - sample_by_ast, merging_params, storage_settings, + sample_by_ast, indexes_ast, merging_params, storage_settings, args.has_force_restore_data_flag); } diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 6ee1e7ca9c9..28d157b6e49 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -51,6 +51,7 @@ StorageMergeTree::StorageMergeTree( const ASTPtr & order_by_ast_, const ASTPtr & primary_key_ast_, const ASTPtr & sample_by_ast_, /// nullptr, if sampling is not supported. + const ASTs & indexes_ast_, const MergeTreeData::MergingParams & merging_params_, const MergeTreeSettings & settings_, bool has_force_restore_data_flag) @@ -59,7 +60,7 @@ StorageMergeTree::StorageMergeTree( data(database_name, table_name, full_path, columns_, context_, date_column_name, partition_by_ast_, order_by_ast_, primary_key_ast_, - sample_by_ast_, merging_params_, + sample_by_ast_, indexes_ast_, merging_params_, settings_, false, attach), reader(data), writer(data), merger_mutator(data, context.getBackgroundPool()), log(&Logger::get(database_name_ + "." + table_name + " (StorageMergeTree)")) diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index c80c06a9758..7354829bfc7 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -174,6 +174,7 @@ protected: const ASTPtr & order_by_ast_, const ASTPtr & primary_key_ast_, const ASTPtr & sample_by_ast_, /// nullptr, if sampling is not supported. + const ASTs & indexes_ast_, const MergeTreeData::MergingParams & merging_params_, const MergeTreeSettings & settings_, bool has_force_restore_data_flag); From b62197b1ad4830cd771aa8abc92843a23b13aa2a Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 26 Dec 2018 15:49:05 +0300 Subject: [PATCH 008/586] fixed compilation --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 2 +- dbms/src/Storages/StorageReplicatedMergeTree.cpp | 3 ++- dbms/src/Storages/StorageReplicatedMergeTree.h | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index b8d80efde66..42df9ec56c1 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -94,7 +94,7 @@ MergeTreeData::MergeTreeData( const ASTPtr & order_by_ast_, const ASTPtr & primary_key_ast_, const ASTPtr & sample_by_ast_, - const ASTs & indexes_ast_, + const ASTs &, const MergingParams & merging_params_, const MergeTreeSettings & settings_, bool require_part_metadata_, diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index afe8cbc02ab..3e93b3f7528 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -203,6 +203,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( const ASTPtr & order_by_ast_, const ASTPtr & primary_key_ast_, const ASTPtr & sample_by_ast_, + const ASTs & indexes_ast_, const MergeTreeData::MergingParams & merging_params_, const MergeTreeSettings & settings_, bool has_force_restore_data_flag) @@ -214,7 +215,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( data(database_name, table_name, full_path, columns_, context_, date_column_name, partition_by_ast_, order_by_ast_, primary_key_ast_, - sample_by_ast_, merging_params_, + sample_by_ast_, indexes_ast_, merging_params_, settings_, true, attach, [this] (const std::string & name) { enqueuePartForCheck(name); }), reader(data), writer(data), merger_mutator(data, context.getBackgroundPool()), queue(*this), diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index 753be7f088b..b9164644335 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -548,6 +548,7 @@ protected: const ASTPtr & order_by_ast_, const ASTPtr & primary_key_ast_, const ASTPtr & sample_by_ast_, + const ASTs & indexes_ast_, const MergeTreeData::MergingParams & merging_params_, const MergeTreeSettings & settings_, bool has_force_restore_data_flag); From c89df916ab46e2313332ebabc2d8ff90d3c91339 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 26 Dec 2018 20:34:44 +0300 Subject: [PATCH 009/586] ptrs --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 8 ++++- dbms/src/Storages/MergeTree/MergeTreeData.h | 4 +++ .../Storages/MergeTree/MergeTreeIndexes.cpp | 8 ++--- .../src/Storages/MergeTree/MergeTreeIndexes.h | 32 +++++++++++-------- 4 files changed, 33 insertions(+), 19 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 42df9ec56c1..9ddb17f7039 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -94,7 +94,7 @@ MergeTreeData::MergeTreeData( const ASTPtr & order_by_ast_, const ASTPtr & primary_key_ast_, const ASTPtr & sample_by_ast_, - const ASTs &, + const ASTs & indexes_ast_, const MergingParams & merging_params_, const MergeTreeSettings & settings_, bool require_part_metadata_, @@ -186,6 +186,12 @@ MergeTreeData::MergeTreeData( throw Exception( "MergeTree data format version on disk doesn't support custom partitioning", ErrorCodes::METADATA_MISMATCH); + + for (const auto & index_ast : indexes_ast_) { + indexes.push_back( + std::move(MergeTreeIndexFactory::instance().get( + std::dynamic_pointer_cast(index_ast)))); + } } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 2587a7ad085..a49648ad682 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -579,6 +580,9 @@ public: Int64 minmax_idx_date_column_pos = -1; /// In a common case minmax index includes a date column. Int64 minmax_idx_time_column_pos = -1; /// In other cases, minmax index often includes a dateTime column. + /// Secondary indexes for MergeTree + MergeTreeIndexes indexes; + /// Names of columns for primary key + secondary sorting columns. Names sorting_key_columns; ASTPtr sorting_key_expr_ast; diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp index c685ebc1b71..de665bedc5f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp @@ -15,15 +15,15 @@ void MergeTreeIndexFactory::registerIndex(const std::string &name, Creator creat ErrorCodes::LOGICAL_ERROR); } -IMergeTreeIndex MergeTreeIndexFactory::get(const ASTIndexDeclaration & node) const { - if (!node.type) +std::unique_ptr MergeTreeIndexFactory::get(std::shared_ptr node) const { + if (!node->type) throw Exception( "for INDEX TYPE is required", ErrorCodes::INCORRECT_QUERY); - auto it = indexes.find(node.type->name); + auto it = indexes.find(node->type->name); if (it == indexes.end()) throw Exception( - "Unknown Index type '" + node.type->name + "'", + "Unknown Index type '" + node->type->name + "'", ErrorCodes::INCORRECT_QUERY); return it->second(node); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h index b6bb008d808..860e5747f3d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h @@ -3,27 +3,28 @@ #include #include #include -#include +#include +#include +#include +#include #include #include namespace DB { -class IMergeTreeIndex; -using MergeTreeIndexes = std::vector; - /// Interface for secondary MergeTree indexes -class IMergeTreeIndex +class MergeTreeIndex { public: - virtual void load(const MergeTreeData & storage, const String & part_path) = 0; - virtual void store(const MergeTreeData & storage, const String & part_path, - MergeTreeDataPartChecksums & checksums) const = 0; + virtual ~MergeTreeIndex() {}; + + virtual void load(const String & part_path) = 0; + virtual void store(const String & part_path, MergeTreeDataPartChecksums & checksums) const = 0; virtual void update(const Block & block, const Names & column_names) = 0; - virtual void merge(const IMergeTreeIndex & other) = 0; + virtual void merge(const MergeTreeIndex & other) = 0; virtual bool alwaysUnknownOrTrue() const = 0; virtual bool maybeTrue() const = 0; @@ -33,18 +34,18 @@ public: Block header; }; +using MergeTreeIndexPtr = std::unique_ptr; +using MergeTreeIndexes = std::vector; + class MergeTreeIndexFactory : public ext::singleton { friend class ext::singleton; public: - using Creator = std::function; + using Creator = std::function(std::shared_ptr node)>; -protected: - MergeTreeIndexFactory() {}; - - IMergeTreeIndex get(const ASTIndexDeclaration & node) const; + std::unique_ptr get(std::shared_ptr node) const; void registerIndex(const std::string & name, Creator creator); @@ -52,6 +53,9 @@ protected: return indexes; } +protected: + MergeTreeIndexFactory() {}; + private: using Indexes = std::unordered_map; Indexes indexes; From 9bf5b6b4aafe56034823aa14c37bef4a13ab51c0 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 28 Dec 2018 18:27:06 +0300 Subject: [PATCH 010/586] added indexParts --- .../src/Storages/MergeTree/MergeTreeIndexes.h | 30 ++++++++++++------- .../Storages/StorageReplicatedMergeTree.cpp | 4 +++ 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h index 860e5747f3d..4e0853236a4 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h @@ -7,37 +7,45 @@ #include #include #include -#include #include namespace DB { +class MergeTreeIndexPart +{ +public: + virtual ~MergeTreeIndexPart() {}; + + virtual void update(const Block & block, const Names & column_names) = 0; + virtual void merge(const MergeTreeIndexPart & other) = 0; +}; + +using MergeTreeIndexPartPtr = std::unique_ptr; +using MergeTreeIndexParts = std::vector; + -/// Interface for secondary MergeTree indexes class MergeTreeIndex { public: + MergeTreeIndex(String name, ExpressionActionsPtr expr, Block key) + : name(name), expr(expr), sample(key) {} + virtual ~MergeTreeIndex() {}; - virtual void load(const String & part_path) = 0; - virtual void store(const String & part_path, MergeTreeDataPartChecksums & checksums) const = 0; - - virtual void update(const Block & block, const Names & column_names) = 0; - virtual void merge(const MergeTreeIndex & other) = 0; - virtual bool alwaysUnknownOrTrue() const = 0; - virtual bool maybeTrue() const = 0; + virtual bool maybeTrue(/* args */) const = 0; + + virtual MergeTreeIndexPartPtr createEmptyIndexPart() const = 0; String name; ExpressionActionsPtr expr; - Block header; + Block sample; }; using MergeTreeIndexPtr = std::unique_ptr; using MergeTreeIndexes = std::vector; - class MergeTreeIndexFactory : public ext::singleton { friend class ext::singleton; diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 3e93b3f7528..0743cfce5e3 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -110,6 +110,7 @@ namespace ErrorCodes extern const int KEEPER_EXCEPTION; extern const int ALL_REPLICAS_LOST; extern const int REPLICA_STATUS_CHANGED; + extern const int INCORRECT_QUERY; } namespace ActionLocks @@ -225,6 +226,9 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( { if (path_.empty()) throw Exception("ReplicatedMergeTree storages require data path", ErrorCodes::INCORRECT_FILE_NAME); + if (!indexes_ast_.empty()) { + throw Exception("ReplicatedMergeTree storages do not support indexes", ErrorCodes::INCORRECT_QUERY); + } if (!zookeeper_path.empty() && zookeeper_path.back() == '/') zookeeper_path.resize(zookeeper_path.size() - 1); From 06d841610763eacb8a750a6d0210f1a5cd281a23 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 28 Dec 2018 20:53:41 +0300 Subject: [PATCH 011/586] indextypes --- .../src/Storages/MergeTree/MergeTreeIndexes.h | 25 +++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h index 4e0853236a4..4b150896a46 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h @@ -12,13 +12,30 @@ namespace DB { -class MergeTreeIndexPart +enum class INDEX_TYPES { + NONE = 0 +}; + + +class MergeTreeIndex; + + +struct MergeTreeIndexPart { + friend MergeTreeIndex; + public: virtual ~MergeTreeIndexPart() {}; virtual void update(const Block & block, const Names & column_names) = 0; virtual void merge(const MergeTreeIndexPart & other) = 0; + + virtual INDEX_TYPES indexType() const { + return INDEX_TYPES::NONE; + } + +protected: + MergeTreeIndexPart() {}; }; using MergeTreeIndexPartPtr = std::unique_ptr; @@ -33,8 +50,12 @@ public: virtual ~MergeTreeIndex() {}; + virtual INDEX_TYPES indexType() const { + return INDEX_TYPES::NONE; + } + virtual bool alwaysUnknownOrTrue() const = 0; - virtual bool maybeTrue(/* args */) const = 0; + virtual bool maybeTrueOn(MergeTreeIndexPartPtr part) const = 0; virtual MergeTreeIndexPartPtr createEmptyIndexPart() const = 0; From 68c3879d956e5996e9ec39e91a5cd960475ebb88 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 29 Dec 2018 14:12:41 +0300 Subject: [PATCH 012/586] index condition --- .../Storages/MergeTree/MergeTreeDataPart.h | 3 + .../MergeTree/MergeTreeDataSelectExecutor.cpp | 3 +- .../Storages/MergeTree/MergeTreeIndexes.cpp | 22 +++++++ .../src/Storages/MergeTree/MergeTreeIndexes.h | 65 ++++++++++++++----- 4 files changed, 74 insertions(+), 19 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h index b277dfaa237..1ae6aa602d0 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -207,6 +208,8 @@ struct MergeTreeDataPart MinMaxIndex minmax_idx; + MergeTreeIndexParts index_parts; + Checksums checksums; /// Columns description. diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index dd5a35ad710..1e9613298dc 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -886,7 +886,8 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( Row index_left(used_key_size); Row index_right(used_key_size); - while (!ranges_stack.empty()) + while (!ranges_stack.empty())/// In other words, it removes subranges from whole range, that definitely could not contain required keys. + { MarkRange range = ranges_stack.back(); ranges_stack.pop_back(); diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp index de665bedc5f..d181c0e0deb 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp @@ -7,8 +7,30 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int INCORRECT_QUERY; + extern const int UNKNOWN_EXCEPTION; } +void MergeTreeIndexPart::update(const Block & block, const Names & column_names) { + updateImpl(block, column_names); +} + +void MergeTreeIndexPart::merge(const MergeTreeIndexPart & other) { + if (other.indexType() != indexType()) { + throw Exception("MergeTreeIndexPart: Merging index part with another index type.", + ErrorCodes::LOGICAL_ERROR); + } + mergeImpl(other); +} + +INDEX_TYPE MergeTreeIndexPart::indexType() const { + return owner->indexType(); +} + +INDEX_TYPE IndexCondition::indexType() const { + return owner->indexType(); +} + + void MergeTreeIndexFactory::registerIndex(const std::string &name, Creator creator) { if (!indexes.emplace(name, std::move(creator)).second) throw Exception("MergeTreeIndexFactory: the Index creator name '" + name + "' is not unique", diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h index 4b150896a46..88f75b11e79 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h @@ -6,42 +6,74 @@ #include #include #include +#include +#include #include #include namespace DB { -enum class INDEX_TYPES { +enum class INDEX_TYPE { NONE = 0 }; class MergeTreeIndex; +using MergeTreeIndexPtr = std::shared_ptr; +using MergeTreeIndexes = std::vector; + +/// Data structure storing some data for each MergeTreeDataPart struct MergeTreeIndexPart { friend MergeTreeIndex; public: - virtual ~MergeTreeIndexPart() {}; + virtual ~MergeTreeIndexPart() = default; - virtual void update(const Block & block, const Names & column_names) = 0; - virtual void merge(const MergeTreeIndexPart & other) = 0; + virtual INDEX_TYPE indexType() const; - virtual INDEX_TYPES indexType() const { - return INDEX_TYPES::NONE; - } + void update(const Block & block, const Names & column_names); + void merge(const MergeTreeIndexPart & other); protected: - MergeTreeIndexPart() {}; + MergeTreeIndexPart() = default; + + virtual void updateImpl(const Block & block, const Names & column_names) = 0; + virtual void mergeImpl(const MergeTreeIndexPart & other) = 0; + + MergeTreeIndexPtr owner; }; -using MergeTreeIndexPartPtr = std::unique_ptr; +using MergeTreeIndexPartPtr = std::shared_ptr; using MergeTreeIndexParts = std::vector; +/// Condition on the index. +class IndexCondition { + friend MergeTreeIndex; + +public: + virtual ~IndexCondition() = default; + + virtual INDEX_TYPE indexType() const; + + // methods like KeyCondition + virtual bool alwaysUnknownOrTrue() const = 0; + virtual bool maybeTrueInRange(const MarkRange & range) const = 0; + +protected: + IndexCondition() = default; + + MergeTreeIndexPtr owner; +}; + +using IndexConditionPtr = std::shared_ptr; + + +/// Structure for storing index info like columns, expression, arguments, ... class MergeTreeIndex { public: @@ -50,22 +82,19 @@ public: virtual ~MergeTreeIndex() {}; - virtual INDEX_TYPES indexType() const { - return INDEX_TYPES::NONE; - } - - virtual bool alwaysUnknownOrTrue() const = 0; - virtual bool maybeTrueOn(MergeTreeIndexPartPtr part) const = 0; + virtual INDEX_TYPE indexType() const = 0; virtual MergeTreeIndexPartPtr createEmptyIndexPart() const = 0; + virtual IndexConditionPtr createIndexCondition(const SelectQueryInfo & query_info, + const Context & context, + const Names & key_column_names, + const ExpressionActionsPtr & key_expr) const = 0; String name; ExpressionActionsPtr expr; Block sample; }; -using MergeTreeIndexPtr = std::unique_ptr; -using MergeTreeIndexes = std::vector; class MergeTreeIndexFactory : public ext::singleton { @@ -83,7 +112,7 @@ public: } protected: - MergeTreeIndexFactory() {}; + MergeTreeIndexFactory() = default; private: using Indexes = std::unordered_map; From 5079330bdc614d2f4d471e793fff03a78dd86a03 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 29 Dec 2018 15:04:00 +0300 Subject: [PATCH 013/586] IndexCondition --- .../Storages/MergeTree/MergeTreeIndexes.cpp | 19 ++--- .../src/Storages/MergeTree/MergeTreeIndexes.h | 71 ++++++++++--------- 2 files changed, 50 insertions(+), 40 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp index d181c0e0deb..1a31b260b78 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp @@ -10,7 +10,18 @@ namespace ErrorCodes extern const int UNKNOWN_EXCEPTION; } + +INDEX_TYPE IndexCondition::indexType() const { + return part->indexType(); +} + + +INDEX_TYPE MergeTreeIndexPart::indexType() const { + return index->indexType(); +} + void MergeTreeIndexPart::update(const Block & block, const Names & column_names) { + /// a few checks? updateImpl(block, column_names); } @@ -22,14 +33,6 @@ void MergeTreeIndexPart::merge(const MergeTreeIndexPart & other) { mergeImpl(other); } -INDEX_TYPE MergeTreeIndexPart::indexType() const { - return owner->indexType(); -} - -INDEX_TYPE IndexCondition::indexType() const { - return owner->indexType(); -} - void MergeTreeIndexFactory::registerIndex(const std::string &name, Creator creator) { if (!indexes.emplace(name, std::move(creator)).second) diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h index 88f75b11e79..c376e91ce42 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h @@ -20,12 +20,42 @@ enum class INDEX_TYPE { class MergeTreeIndex; +struct MergeTreeIndexPart; using MergeTreeIndexPtr = std::shared_ptr; using MergeTreeIndexes = std::vector; +using MergeTreeIndexPartPtr = std::shared_ptr; +using MergeTreeIndexParts = std::vector; -/// Data structure storing some data for each MergeTreeDataPart + +/// Condition on the index. +/// It works only with one indexPart (MergeTreeDataPart). +class IndexCondition { + friend MergeTreeIndexPart; + +public: + virtual ~IndexCondition() = default; + + virtual INDEX_TYPE indexType() const; + + // Checks if this index is useful for query. + virtual bool alwaysUnknownOrTrue() const = 0; + + // Splits it's input to ranges with length equal to index granularity (or less on the borders). + // Then it reads necessary ranges of the index and decides what ranges from the input should be reduced. + virtual MarkRanges filterRanges(const MarkRanges & ranges) const = 0; + +protected: + IndexCondition() = default; + + MergeTreeIndexPartPtr part; +}; + +using IndexConditionPtr = std::shared_ptr; + + +/// Data structure for operations with index data for each MergeTreeDataPart. struct MergeTreeIndexPart { friend MergeTreeIndex; @@ -38,42 +68,23 @@ public: void update(const Block & block, const Names & column_names); void merge(const MergeTreeIndexPart & other); + virtual IndexConditionPtr createIndexConditionOnPart( + const SelectQueryInfo & query_info + , const Context & context + , const Names & key_column_names + , const ExpressionActionsPtr & key_expr) const = 0; + protected: MergeTreeIndexPart() = default; virtual void updateImpl(const Block & block, const Names & column_names) = 0; virtual void mergeImpl(const MergeTreeIndexPart & other) = 0; - MergeTreeIndexPtr owner; + MergeTreeIndexPtr index; }; -using MergeTreeIndexPartPtr = std::shared_ptr; -using MergeTreeIndexParts = std::vector; - -/// Condition on the index. -class IndexCondition { - friend MergeTreeIndex; - -public: - virtual ~IndexCondition() = default; - - virtual INDEX_TYPE indexType() const; - - // methods like KeyCondition - virtual bool alwaysUnknownOrTrue() const = 0; - virtual bool maybeTrueInRange(const MarkRange & range) const = 0; - -protected: - IndexCondition() = default; - - MergeTreeIndexPtr owner; -}; - -using IndexConditionPtr = std::shared_ptr; - - -/// Structure for storing index info like columns, expression, arguments, ... +/// Structure for storing basic index info like columns, expression, arguments, ... class MergeTreeIndex { public: @@ -85,10 +96,6 @@ public: virtual INDEX_TYPE indexType() const = 0; virtual MergeTreeIndexPartPtr createEmptyIndexPart() const = 0; - virtual IndexConditionPtr createIndexCondition(const SelectQueryInfo & query_info, - const Context & context, - const Names & key_column_names, - const ExpressionActionsPtr & key_expr) const = 0; String name; ExpressionActionsPtr expr; From f90cdca498e7634744b56c38bffdf5c3be14b8e0 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 29 Dec 2018 16:02:57 +0300 Subject: [PATCH 014/586] added indexes in selectexecutor --- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 1554 +++++++++-------- .../src/Storages/MergeTree/MergeTreeIndexes.h | 2 + 2 files changed, 784 insertions(+), 772 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 1e9613298dc..0d61a850c99 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -58,889 +58,899 @@ namespace ProfileEvents namespace DB { -namespace ErrorCodes -{ - extern const int INDEX_NOT_USED; - extern const int SAMPLING_NOT_SUPPORTED; - extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER; - extern const int ILLEGAL_COLUMN; - extern const int ARGUMENT_OUT_OF_BOUND; -} + namespace ErrorCodes + { + extern const int INDEX_NOT_USED; + extern const int SAMPLING_NOT_SUPPORTED; + extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER; + extern const int ILLEGAL_COLUMN; + extern const int ARGUMENT_OUT_OF_BOUND; + } -MergeTreeDataSelectExecutor::MergeTreeDataSelectExecutor(const MergeTreeData & data_) - : data(data_), log(&Logger::get(data.getLogName() + " (SelectExecutor)")) -{ -} + MergeTreeDataSelectExecutor::MergeTreeDataSelectExecutor(const MergeTreeData & data_) + : data(data_), log(&Logger::get(data.getLogName() + " (SelectExecutor)")) + { + } /// Construct a block consisting only of possible values of virtual columns -static Block getBlockWithPartColumn(const MergeTreeData::DataPartsVector & parts) -{ - auto column = ColumnString::create(); - - for (const auto & part : parts) - column->insert(part->name); - - return Block{ColumnWithTypeAndName(std::move(column), std::make_shared(), "_part")}; -} - - -size_t MergeTreeDataSelectExecutor::getApproximateTotalRowsToRead( - const MergeTreeData::DataPartsVector & parts, const KeyCondition & key_condition, const Settings & settings) const -{ - size_t full_marks_count = 0; - - /// We will find out how many rows we would have read without sampling. - LOG_DEBUG(log, "Preliminary index scan with condition: " << key_condition.toString()); - - for (size_t i = 0; i < parts.size(); ++i) + static Block getBlockWithPartColumn(const MergeTreeData::DataPartsVector & parts) { - const MergeTreeData::DataPartPtr & part = parts[i]; - MarkRanges ranges = markRangesFromPKRange(part->index, key_condition, settings); + auto column = ColumnString::create(); - /** In order to get a lower bound on the number of rows that match the condition on PK, - * consider only guaranteed full marks. - * That is, do not take into account the first and last marks, which may be incomplete. - */ - for (size_t j = 0; j < ranges.size(); ++j) - if (ranges[j].end - ranges[j].begin > 2) - full_marks_count += ranges[j].end - ranges[j].begin - 2; + for (const auto & part : parts) + column->insert(part->name); + + return Block{ColumnWithTypeAndName(std::move(column), std::make_shared(), "_part")}; } - return full_marks_count * data.index_granularity; -} + + size_t MergeTreeDataSelectExecutor::getApproximateTotalRowsToRead( + const MergeTreeData::DataPartsVector & parts, const KeyCondition & key_condition, const Settings & settings) const + { + size_t full_marks_count = 0; + + /// We will find out how many rows we would have read without sampling. + LOG_DEBUG(log, "Preliminary index scan with condition: " << key_condition.toString()); + + for (size_t i = 0; i < parts.size(); ++i) + { + const MergeTreeData::DataPartPtr & part = parts[i]; + MarkRanges ranges = markRangesFromPKRange(part->index, key_condition, settings); + + /** In order to get a lower bound on the number of rows that match the condition on PK, + * consider only guaranteed full marks. + * That is, do not take into account the first and last marks, which may be incomplete. + */ + for (size_t j = 0; j < ranges.size(); ++j) + if (ranges[j].end - ranges[j].begin > 2) + full_marks_count += ranges[j].end - ranges[j].begin - 2; + } + + return full_marks_count * data.index_granularity; + } -using RelativeSize = boost::rational; + using RelativeSize = boost::rational; -std::string toString(const RelativeSize & x) -{ - return ASTSampleRatio::toString(x.numerator()) + "/" + ASTSampleRatio::toString(x.denominator()); -} + std::string toString(const RelativeSize & x) + { + return ASTSampleRatio::toString(x.numerator()) + "/" + ASTSampleRatio::toString(x.denominator()); + } /// Converts sample size to an approximate number of rows (ex. `SAMPLE 1000000`) to relative value (ex. `SAMPLE 0.1`). -static RelativeSize convertAbsoluteSampleSizeToRelative(const ASTPtr & node, size_t approx_total_rows) -{ - if (approx_total_rows == 0) - return 1; - - const ASTSampleRatio & node_sample = typeid_cast(*node); - - auto absolute_sample_size = node_sample.ratio.numerator / node_sample.ratio.denominator; - return std::min(RelativeSize(1), RelativeSize(absolute_sample_size) / RelativeSize(approx_total_rows)); -} - - -BlockInputStreams MergeTreeDataSelectExecutor::read( - const Names & column_names_to_return, - const SelectQueryInfo & query_info, - const Context & context, - const size_t max_block_size, - const unsigned num_streams, - const PartitionIdToMaxBlock * max_block_numbers_to_read) const -{ - return readFromParts( - data.getDataPartsVector(), column_names_to_return, query_info, context, - max_block_size, num_streams, max_block_numbers_to_read); -} - -BlockInputStreams MergeTreeDataSelectExecutor::readFromParts( - MergeTreeData::DataPartsVector parts, - const Names & column_names_to_return, - const SelectQueryInfo & query_info, - const Context & context, - const size_t max_block_size, - const unsigned num_streams, - const PartitionIdToMaxBlock * max_block_numbers_to_read) const -{ - size_t part_index = 0; - - /// If query contains restrictions on the virtual column `_part` or `_part_index`, select only parts suitable for it. - /// The virtual column `_sample_factor` (which is equal to 1 / used sample rate) can be requested in the query. - Names virt_column_names; - Names real_column_names; - - bool part_column_queried = false; - - bool sample_factor_column_queried = false; - Float64 used_sample_factor = 1; - - for (const String & name : column_names_to_return) + static RelativeSize convertAbsoluteSampleSizeToRelative(const ASTPtr & node, size_t approx_total_rows) { - if (name == "_part") - { - part_column_queried = true; - virt_column_names.push_back(name); - } - else if (name == "_part_index") - { - virt_column_names.push_back(name); - } - else if (name == "_partition_id") - { - virt_column_names.push_back(name); - } - else if (name == "_sample_factor") - { - sample_factor_column_queried = true; - virt_column_names.push_back(name); - } - else - { - real_column_names.push_back(name); - } + if (approx_total_rows == 0) + return 1; + + const ASTSampleRatio & node_sample = typeid_cast(*node); + + auto absolute_sample_size = node_sample.ratio.numerator / node_sample.ratio.denominator; + return std::min(RelativeSize(1), RelativeSize(absolute_sample_size) / RelativeSize(approx_total_rows)); } - NamesAndTypesList available_real_columns = data.getColumns().getAllPhysical(); - /// If there are only virtual columns in the query, you must request at least one non-virtual one. - if (real_column_names.empty()) - real_column_names.push_back(ExpressionActions::getSmallestColumn(available_real_columns)); - - /// If `_part` virtual column is requested, we try to use it as an index. - Block virtual_columns_block = getBlockWithPartColumn(parts); - if (part_column_queried) - VirtualColumnUtils::filterBlockWithQuery(query_info.query, virtual_columns_block, context); - - std::multiset part_values = VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); - - data.check(real_column_names); - - const Settings & settings = context.getSettingsRef(); - Names primary_key_columns = data.primary_key_columns; - - KeyCondition key_condition(query_info, context, primary_key_columns, data.primary_key_expr); - - if (settings.force_primary_key && key_condition.alwaysUnknownOrTrue()) + BlockInputStreams MergeTreeDataSelectExecutor::read( + const Names & column_names_to_return, + const SelectQueryInfo & query_info, + const Context & context, + const size_t max_block_size, + const unsigned num_streams, + const PartitionIdToMaxBlock * max_block_numbers_to_read) const { - std::stringstream exception_message; - exception_message << "Primary key ("; - for (size_t i = 0, size = primary_key_columns.size(); i < size; ++i) - exception_message << (i == 0 ? "" : ", ") << primary_key_columns[i]; - exception_message << ") is not used and setting 'force_primary_key' is set."; - - throw Exception(exception_message.str(), ErrorCodes::INDEX_NOT_USED); + return readFromParts( + data.getDataPartsVector(), column_names_to_return, query_info, context, + max_block_size, num_streams, max_block_numbers_to_read); } - std::optional minmax_idx_condition; - if (data.minmax_idx_expr) + BlockInputStreams MergeTreeDataSelectExecutor::readFromParts( + MergeTreeData::DataPartsVector parts, + const Names & column_names_to_return, + const SelectQueryInfo & query_info, + const Context & context, + const size_t max_block_size, + const unsigned num_streams, + const PartitionIdToMaxBlock * max_block_numbers_to_read) const { - minmax_idx_condition.emplace(query_info, context, data.minmax_idx_columns, data.minmax_idx_expr); + size_t part_index = 0; - if (settings.force_index_by_date && minmax_idx_condition->alwaysUnknownOrTrue()) + /// If query contains restrictions on the virtual column `_part` or `_part_index`, select only parts suitable for it. + /// The virtual column `_sample_factor` (which is equal to 1 / used sample rate) can be requested in the query. + Names virt_column_names; + Names real_column_names; + + bool part_column_queried = false; + + bool sample_factor_column_queried = false; + Float64 used_sample_factor = 1; + + for (const String & name : column_names_to_return) { - String msg = "MinMax index by columns ("; - bool first = true; - for (const String & col : data.minmax_idx_columns) + if (name == "_part") { - if (first) - first = false; - else - msg += ", "; - msg += col; + part_column_queried = true; + virt_column_names.push_back(name); } - msg += ") is not used and setting 'force_index_by_date' is set"; - - throw Exception(msg, ErrorCodes::INDEX_NOT_USED); - } - } - - /// Select the parts in which there can be data that satisfy `minmax_idx_condition` and that match the condition on `_part`, - /// as well as `max_block_number_to_read`. - { - auto prev_parts = parts; - parts.clear(); - - for (const auto & part : prev_parts) - { - if (part_values.find(part->name) == part_values.end()) - continue; - - if (part->isEmpty()) - continue; - - if (minmax_idx_condition && !minmax_idx_condition->mayBeTrueInParallelogram( - part->minmax_idx.parallelogram, data.minmax_idx_column_types)) - continue; - - if (max_block_numbers_to_read) + else if (name == "_part_index") { - auto blocks_iterator = max_block_numbers_to_read->find(part->info.partition_id); - if (blocks_iterator == max_block_numbers_to_read->end() || part->info.max_block > blocks_iterator->second) + virt_column_names.push_back(name); + } + else if (name == "_partition_id") + { + virt_column_names.push_back(name); + } + else if (name == "_sample_factor") + { + sample_factor_column_queried = true; + virt_column_names.push_back(name); + } + else + { + real_column_names.push_back(name); + } + } + + NamesAndTypesList available_real_columns = data.getColumns().getAllPhysical(); + + /// If there are only virtual columns in the query, you must request at least one non-virtual one. + if (real_column_names.empty()) + real_column_names.push_back(ExpressionActions::getSmallestColumn(available_real_columns)); + + /// If `_part` virtual column is requested, we try to use it as an index. + Block virtual_columns_block = getBlockWithPartColumn(parts); + if (part_column_queried) + VirtualColumnUtils::filterBlockWithQuery(query_info.query, virtual_columns_block, context); + + std::multiset part_values = VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); + + data.check(real_column_names); + + const Settings & settings = context.getSettingsRef(); + Names primary_key_columns = data.primary_key_columns; + + KeyCondition key_condition(query_info, context, primary_key_columns, data.primary_key_expr); + + if (settings.force_primary_key && key_condition.alwaysUnknownOrTrue()) + { + std::stringstream exception_message; + exception_message << "Primary key ("; + for (size_t i = 0, size = primary_key_columns.size(); i < size; ++i) + exception_message << (i == 0 ? "" : ", ") << primary_key_columns[i]; + exception_message << ") is not used and setting 'force_primary_key' is set."; + + throw Exception(exception_message.str(), ErrorCodes::INDEX_NOT_USED); + } + + std::optional minmax_idx_condition; + if (data.minmax_idx_expr) + { + minmax_idx_condition.emplace(query_info, context, data.minmax_idx_columns, data.minmax_idx_expr); + + if (settings.force_index_by_date && minmax_idx_condition->alwaysUnknownOrTrue()) + { + String msg = "MinMax index by columns ("; + bool first = true; + for (const String & col : data.minmax_idx_columns) + { + if (first) + first = false; + else + msg += ", "; + msg += col; + } + msg += ") is not used and setting 'force_index_by_date' is set"; + + throw Exception(msg, ErrorCodes::INDEX_NOT_USED); + } + } + + /// Select the parts in which there can be data that satisfy `minmax_idx_condition` and that match the condition on `_part`, + /// as well as `max_block_number_to_read`. + { + auto prev_parts = parts; + parts.clear(); + + for (const auto & part : prev_parts) + { + if (part_values.find(part->name) == part_values.end()) continue; + + if (part->isEmpty()) + continue; + + if (minmax_idx_condition && !minmax_idx_condition->mayBeTrueInParallelogram( + part->minmax_idx.parallelogram, data.minmax_idx_column_types)) + continue; + + if (max_block_numbers_to_read) + { + auto blocks_iterator = max_block_numbers_to_read->find(part->info.partition_id); + if (blocks_iterator == max_block_numbers_to_read->end() || part->info.max_block > blocks_iterator->second) + continue; + } + + parts.push_back(part); } - - parts.push_back(part); - } - } - - /// Sampling. - Names column_names_to_read = real_column_names; - std::shared_ptr filter_function; - ExpressionActionsPtr filter_expression; - - RelativeSize relative_sample_size = 0; - RelativeSize relative_sample_offset = 0; - - ASTSelectQuery & select = typeid_cast(*query_info.query); - - auto select_sample_size = select.sample_size(); - auto select_sample_offset = select.sample_offset(); - - if (select_sample_size) - { - relative_sample_size.assign( - typeid_cast(*select_sample_size).ratio.numerator, - typeid_cast(*select_sample_size).ratio.denominator); - - if (relative_sample_size < 0) - throw Exception("Negative sample size", ErrorCodes::ARGUMENT_OUT_OF_BOUND); - - relative_sample_offset = 0; - if (select_sample_offset) - relative_sample_offset.assign( - typeid_cast(*select_sample_offset).ratio.numerator, - typeid_cast(*select_sample_offset).ratio.denominator); - - if (relative_sample_offset < 0) - throw Exception("Negative sample offset", ErrorCodes::ARGUMENT_OUT_OF_BOUND); - - /// Convert absolute value of the sampling (in form `SAMPLE 1000000` - how many rows to read) into the relative `SAMPLE 0.1` (how much data to read). - size_t approx_total_rows = 0; - if (relative_sample_size > 1 || relative_sample_offset > 1) - approx_total_rows = getApproximateTotalRowsToRead(parts, key_condition, settings); - - if (relative_sample_size > 1) - { - relative_sample_size = convertAbsoluteSampleSizeToRelative(select_sample_size, approx_total_rows); - LOG_DEBUG(log, "Selected relative sample size: " << toString(relative_sample_size)); } - /// SAMPLE 1 is the same as the absence of SAMPLE. - if (relative_sample_size == RelativeSize(1)) - relative_sample_size = 0; + /// Sampling. + Names column_names_to_read = real_column_names; + std::shared_ptr filter_function; + ExpressionActionsPtr filter_expression; - if (relative_sample_offset > 0 && RelativeSize(0) == relative_sample_size) - throw Exception("Sampling offset is incorrect because no sampling", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + RelativeSize relative_sample_size = 0; + RelativeSize relative_sample_offset = 0; - if (relative_sample_offset > 1) + ASTSelectQuery & select = typeid_cast(*query_info.query); + + auto select_sample_size = select.sample_size(); + auto select_sample_offset = select.sample_offset(); + + if (select_sample_size) { - relative_sample_offset = convertAbsoluteSampleSizeToRelative(select_sample_offset, approx_total_rows); - LOG_DEBUG(log, "Selected relative sample offset: " << toString(relative_sample_offset)); - } - } + relative_sample_size.assign( + typeid_cast(*select_sample_size).ratio.numerator, + typeid_cast(*select_sample_size).ratio.denominator); - /** Which range of sampling key values do I need to read? - * First, in the whole range ("universe") we select the interval - * of relative `relative_sample_size` size, offset from the beginning by `relative_sample_offset`. - * - * Example: SAMPLE 0.4 OFFSET 0.3 - * - * [------********------] - * ^ - offset - * <------> - size - * - * If the interval passes through the end of the universe, then cut its right side. - * - * Example: SAMPLE 0.4 OFFSET 0.8 - * - * [----------------****] - * ^ - offset - * <------> - size - * - * Next, if the `parallel_replicas_count`, `parallel_replica_offset` settings are set, - * then it is necessary to break the received interval into pieces of the number `parallel_replicas_count`, - * and select a piece with the number `parallel_replica_offset` (from zero). - * - * Example: SAMPLE 0.4 OFFSET 0.3, parallel_replicas_count = 2, parallel_replica_offset = 1 - * - * [----------****------] - * ^ - offset - * <------> - size - * <--><--> - pieces for different `parallel_replica_offset`, select the second one. - * - * It is very important that the intervals for different `parallel_replica_offset` cover the entire range without gaps and overlaps. - * It is also important that the entire universe can be covered using SAMPLE 0.1 OFFSET 0, ... OFFSET 0.9 and similar decimals. - */ + if (relative_sample_size < 0) + throw Exception("Negative sample size", ErrorCodes::ARGUMENT_OUT_OF_BOUND); - bool use_sampling = relative_sample_size > 0 || settings.parallel_replicas_count > 1; - bool no_data = false; /// There is nothing left after sampling. + relative_sample_offset = 0; + if (select_sample_offset) + relative_sample_offset.assign( + typeid_cast(*select_sample_offset).ratio.numerator, + typeid_cast(*select_sample_offset).ratio.denominator); - if (use_sampling) - { - if (!data.supportsSampling()) - throw Exception("Illegal SAMPLE: table doesn't support sampling", ErrorCodes::SAMPLING_NOT_SUPPORTED); + if (relative_sample_offset < 0) + throw Exception("Negative sample offset", ErrorCodes::ARGUMENT_OUT_OF_BOUND); - if (sample_factor_column_queried && relative_sample_size != RelativeSize(0)) - used_sample_factor = 1.0 / boost::rational_cast(relative_sample_size); + /// Convert absolute value of the sampling (in form `SAMPLE 1000000` - how many rows to read) into the relative `SAMPLE 0.1` (how much data to read). + size_t approx_total_rows = 0; + if (relative_sample_size > 1 || relative_sample_offset > 1) + approx_total_rows = getApproximateTotalRowsToRead(parts, key_condition, settings); - RelativeSize size_of_universum = 0; - DataTypePtr type = data.primary_key_sample.getByName(data.sampling_expr_column_name).type; - - if (typeid_cast(type.get())) - size_of_universum = RelativeSize(std::numeric_limits::max()) + RelativeSize(1); - else if (typeid_cast(type.get())) - size_of_universum = RelativeSize(std::numeric_limits::max()) + RelativeSize(1); - else if (typeid_cast(type.get())) - size_of_universum = RelativeSize(std::numeric_limits::max()) + RelativeSize(1); - else if (typeid_cast(type.get())) - size_of_universum = RelativeSize(std::numeric_limits::max()) + RelativeSize(1); - else - throw Exception("Invalid sampling column type in storage parameters: " + type->getName() + ". Must be unsigned integer type.", - ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); - - if (settings.parallel_replicas_count > 1) - { - if (relative_sample_size == RelativeSize(0)) - relative_sample_size = 1; - - relative_sample_size /= settings.parallel_replicas_count.value; - relative_sample_offset += relative_sample_size * RelativeSize(settings.parallel_replica_offset.value); - } - - if (relative_sample_offset >= RelativeSize(1)) - no_data = true; - - /// Calculate the half-interval of `[lower, upper)` column values. - bool has_lower_limit = false; - bool has_upper_limit = false; - - RelativeSize lower_limit_rational = relative_sample_offset * size_of_universum; - RelativeSize upper_limit_rational = (relative_sample_offset + relative_sample_size) * size_of_universum; - - UInt64 lower = boost::rational_cast(lower_limit_rational); - UInt64 upper = boost::rational_cast(upper_limit_rational); - - if (lower > 0) - has_lower_limit = true; - - if (upper_limit_rational < size_of_universum) - has_upper_limit = true; - - /*std::cerr << std::fixed << std::setprecision(100) - << "relative_sample_size: " << relative_sample_size << "\n" - << "relative_sample_offset: " << relative_sample_offset << "\n" - << "lower_limit_float: " << lower_limit_rational << "\n" - << "upper_limit_float: " << upper_limit_rational << "\n" - << "lower: " << lower << "\n" - << "upper: " << upper << "\n";*/ - - if ((has_upper_limit && upper == 0) - || (has_lower_limit && has_upper_limit && lower == upper)) - no_data = true; - - if (no_data || (!has_lower_limit && !has_upper_limit)) - { - use_sampling = false; - } - else - { - /// Let's add the conditions to cut off something else when the index is scanned again and when the request is processed. - - std::shared_ptr lower_function; - std::shared_ptr upper_function; - - if (has_lower_limit) + if (relative_sample_size > 1) { - if (!key_condition.addCondition(data.sampling_expr_column_name, Range::createLeftBounded(lower, true))) - throw Exception("Sampling column not in primary key", ErrorCodes::ILLEGAL_COLUMN); - - ASTPtr args = std::make_shared(); - args->children.push_back(data.getSamplingExpression()); - args->children.push_back(std::make_shared(lower)); - - lower_function = std::make_shared(); - lower_function->name = "greaterOrEquals"; - lower_function->arguments = args; - lower_function->children.push_back(lower_function->arguments); - - filter_function = lower_function; + relative_sample_size = convertAbsoluteSampleSizeToRelative(select_sample_size, approx_total_rows); + LOG_DEBUG(log, "Selected relative sample size: " << toString(relative_sample_size)); } - if (has_upper_limit) + /// SAMPLE 1 is the same as the absence of SAMPLE. + if (relative_sample_size == RelativeSize(1)) + relative_sample_size = 0; + + if (relative_sample_offset > 0 && RelativeSize(0) == relative_sample_size) + throw Exception("Sampling offset is incorrect because no sampling", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + + if (relative_sample_offset > 1) { - if (!key_condition.addCondition(data.sampling_expr_column_name, Range::createRightBounded(upper, false))) - throw Exception("Sampling column not in primary key", ErrorCodes::ILLEGAL_COLUMN); - - ASTPtr args = std::make_shared(); - args->children.push_back(data.getSamplingExpression()); - args->children.push_back(std::make_shared(upper)); - - upper_function = std::make_shared(); - upper_function->name = "less"; - upper_function->arguments = args; - upper_function->children.push_back(upper_function->arguments); - - filter_function = upper_function; + relative_sample_offset = convertAbsoluteSampleSizeToRelative(select_sample_offset, approx_total_rows); + LOG_DEBUG(log, "Selected relative sample offset: " << toString(relative_sample_offset)); } + } - if (has_lower_limit && has_upper_limit) + /** Which range of sampling key values do I need to read? + * First, in the whole range ("universe") we select the interval + * of relative `relative_sample_size` size, offset from the beginning by `relative_sample_offset`. + * + * Example: SAMPLE 0.4 OFFSET 0.3 + * + * [------********------] + * ^ - offset + * <------> - size + * + * If the interval passes through the end of the universe, then cut its right side. + * + * Example: SAMPLE 0.4 OFFSET 0.8 + * + * [----------------****] + * ^ - offset + * <------> - size + * + * Next, if the `parallel_replicas_count`, `parallel_replica_offset` settings are set, + * then it is necessary to break the received interval into pieces of the number `parallel_replicas_count`, + * and select a piece with the number `parallel_replica_offset` (from zero). + * + * Example: SAMPLE 0.4 OFFSET 0.3, parallel_replicas_count = 2, parallel_replica_offset = 1 + * + * [----------****------] + * ^ - offset + * <------> - size + * <--><--> - pieces for different `parallel_replica_offset`, select the second one. + * + * It is very important that the intervals for different `parallel_replica_offset` cover the entire range without gaps and overlaps. + * It is also important that the entire universe can be covered using SAMPLE 0.1 OFFSET 0, ... OFFSET 0.9 and similar decimals. + */ + + bool use_sampling = relative_sample_size > 0 || settings.parallel_replicas_count > 1; + bool no_data = false; /// There is nothing left after sampling. + + if (use_sampling) + { + if (!data.supportsSampling()) + throw Exception("Illegal SAMPLE: table doesn't support sampling", ErrorCodes::SAMPLING_NOT_SUPPORTED); + + if (sample_factor_column_queried && relative_sample_size != RelativeSize(0)) + used_sample_factor = 1.0 / boost::rational_cast(relative_sample_size); + + RelativeSize size_of_universum = 0; + DataTypePtr type = data.primary_key_sample.getByName(data.sampling_expr_column_name).type; + + if (typeid_cast(type.get())) + size_of_universum = RelativeSize(std::numeric_limits::max()) + RelativeSize(1); + else if (typeid_cast(type.get())) + size_of_universum = RelativeSize(std::numeric_limits::max()) + RelativeSize(1); + else if (typeid_cast(type.get())) + size_of_universum = RelativeSize(std::numeric_limits::max()) + RelativeSize(1); + else if (typeid_cast(type.get())) + size_of_universum = RelativeSize(std::numeric_limits::max()) + RelativeSize(1); + else + throw Exception("Invalid sampling column type in storage parameters: " + type->getName() + ". Must be unsigned integer type.", + ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); + + if (settings.parallel_replicas_count > 1) { - ASTPtr args = std::make_shared(); - args->children.push_back(lower_function); - args->children.push_back(upper_function); + if (relative_sample_size == RelativeSize(0)) + relative_sample_size = 1; - filter_function = std::make_shared(); - filter_function->name = "and"; - filter_function->arguments = args; - filter_function->children.push_back(filter_function->arguments); + relative_sample_size /= settings.parallel_replicas_count.value; + relative_sample_offset += relative_sample_size * RelativeSize(settings.parallel_replica_offset.value); } - ASTPtr query = filter_function; - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, available_real_columns); - filter_expression = ExpressionAnalyzer(filter_function, syntax_result, context).getActions(false); + if (relative_sample_offset >= RelativeSize(1)) + no_data = true; - /// Add columns needed for `sample_by_ast` to `column_names_to_read`. - std::vector add_columns = filter_expression->getRequiredColumns(); + /// Calculate the half-interval of `[lower, upper)` column values. + bool has_lower_limit = false; + bool has_upper_limit = false; + + RelativeSize lower_limit_rational = relative_sample_offset * size_of_universum; + RelativeSize upper_limit_rational = (relative_sample_offset + relative_sample_size) * size_of_universum; + + UInt64 lower = boost::rational_cast(lower_limit_rational); + UInt64 upper = boost::rational_cast(upper_limit_rational); + + if (lower > 0) + has_lower_limit = true; + + if (upper_limit_rational < size_of_universum) + has_upper_limit = true; + + /*std::cerr << std::fixed << std::setprecision(100) + << "relative_sample_size: " << relative_sample_size << "\n" + << "relative_sample_offset: " << relative_sample_offset << "\n" + << "lower_limit_float: " << lower_limit_rational << "\n" + << "upper_limit_float: " << upper_limit_rational << "\n" + << "lower: " << lower << "\n" + << "upper: " << upper << "\n";*/ + + if ((has_upper_limit && upper == 0) + || (has_lower_limit && has_upper_limit && lower == upper)) + no_data = true; + + if (no_data || (!has_lower_limit && !has_upper_limit)) + { + use_sampling = false; + } + else + { + /// Let's add the conditions to cut off something else when the index is scanned again and when the request is processed. + + std::shared_ptr lower_function; + std::shared_ptr upper_function; + + if (has_lower_limit) + { + if (!key_condition.addCondition(data.sampling_expr_column_name, Range::createLeftBounded(lower, true))) + throw Exception("Sampling column not in primary key", ErrorCodes::ILLEGAL_COLUMN); + + ASTPtr args = std::make_shared(); + args->children.push_back(data.getSamplingExpression()); + args->children.push_back(std::make_shared(lower)); + + lower_function = std::make_shared(); + lower_function->name = "greaterOrEquals"; + lower_function->arguments = args; + lower_function->children.push_back(lower_function->arguments); + + filter_function = lower_function; + } + + if (has_upper_limit) + { + if (!key_condition.addCondition(data.sampling_expr_column_name, Range::createRightBounded(upper, false))) + throw Exception("Sampling column not in primary key", ErrorCodes::ILLEGAL_COLUMN); + + ASTPtr args = std::make_shared(); + args->children.push_back(data.getSamplingExpression()); + args->children.push_back(std::make_shared(upper)); + + upper_function = std::make_shared(); + upper_function->name = "less"; + upper_function->arguments = args; + upper_function->children.push_back(upper_function->arguments); + + filter_function = upper_function; + } + + if (has_lower_limit && has_upper_limit) + { + ASTPtr args = std::make_shared(); + args->children.push_back(lower_function); + args->children.push_back(upper_function); + + filter_function = std::make_shared(); + filter_function->name = "and"; + filter_function->arguments = args; + filter_function->children.push_back(filter_function->arguments); + } + + ASTPtr query = filter_function; + auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, available_real_columns); + filter_expression = ExpressionAnalyzer(filter_function, syntax_result, context).getActions(false); + + /// Add columns needed for `sample_by_ast` to `column_names_to_read`. + std::vector add_columns = filter_expression->getRequiredColumns(); + column_names_to_read.insert(column_names_to_read.end(), add_columns.begin(), add_columns.end()); + std::sort(column_names_to_read.begin(), column_names_to_read.end()); + column_names_to_read.erase(std::unique(column_names_to_read.begin(), column_names_to_read.end()), column_names_to_read.end()); + } + } + + if (no_data) + { + LOG_DEBUG(log, "Sampling yields no data."); + return {}; + } + + LOG_DEBUG(log, "Key condition: " << key_condition.toString()); + if (minmax_idx_condition) + LOG_DEBUG(log, "MinMax index condition: " << minmax_idx_condition->toString()); + + /// PREWHERE + String prewhere_column; + if (select.prewhere_expression) + prewhere_column = select.prewhere_expression->getColumnName(); + + RangesInDataParts parts_with_ranges; + + /// Let's find what range to read from each part. + size_t sum_marks = 0; + size_t sum_ranges = 0; + for (auto & part : parts) + { + RangesInDataPart ranges(part, part_index++); + + if (data.hasPrimaryKey()) + ranges.ranges = markRangesFromPKRange(part->index, key_condition, settings); + else + ranges.ranges = MarkRanges{MarkRange{0, part->marks_count}}; + + for (const auto index_part : part->index_parts) { + const auto condition = index_part->createIndexConditionOnPart( + query_info, context, index_part->index->sample.getNames(), index_part->index->expr); + if (condition->alwaysUnknownOrTrue()) { + continue; + } else { + ranges.ranges = condition->filterRanges(ranges.ranges); + } + } + + if (!ranges.ranges.empty()) + { + parts_with_ranges.push_back(ranges); + + sum_ranges += ranges.ranges.size(); + for (const auto & range : ranges.ranges) + sum_marks += range.end - range.begin; + } + } + + LOG_DEBUG(log, "Selected " << parts.size() << " parts by date, " << parts_with_ranges.size() << " parts by key, " + << sum_marks << " marks to read from " << sum_ranges << " ranges"); + + if (parts_with_ranges.empty()) + return {}; + + ProfileEvents::increment(ProfileEvents::SelectedParts, parts_with_ranges.size()); + ProfileEvents::increment(ProfileEvents::SelectedRanges, sum_ranges); + ProfileEvents::increment(ProfileEvents::SelectedMarks, sum_marks); + + BlockInputStreams res; + + if (select.final()) + { + /// Add columns needed to calculate the sorting expression and the sign. + std::vector add_columns = data.sorting_key_expr->getRequiredColumns(); column_names_to_read.insert(column_names_to_read.end(), add_columns.begin(), add_columns.end()); + + if (!data.merging_params.sign_column.empty()) + column_names_to_read.push_back(data.merging_params.sign_column); + if (!data.merging_params.version_column.empty()) + column_names_to_read.push_back(data.merging_params.version_column); + std::sort(column_names_to_read.begin(), column_names_to_read.end()); column_names_to_read.erase(std::unique(column_names_to_read.begin(), column_names_to_read.end()), column_names_to_read.end()); + + res = spreadMarkRangesAmongStreamsFinal( + std::move(parts_with_ranges), + column_names_to_read, + max_block_size, + settings.use_uncompressed_cache, + query_info.prewhere_info, + virt_column_names, + settings); } - } - - if (no_data) - { - LOG_DEBUG(log, "Sampling yields no data."); - return {}; - } - - LOG_DEBUG(log, "Key condition: " << key_condition.toString()); - if (minmax_idx_condition) - LOG_DEBUG(log, "MinMax index condition: " << minmax_idx_condition->toString()); - - /// PREWHERE - String prewhere_column; - if (select.prewhere_expression) - prewhere_column = select.prewhere_expression->getColumnName(); - - RangesInDataParts parts_with_ranges; - - /// Let's find what range to read from each part. - size_t sum_marks = 0; - size_t sum_ranges = 0; - for (auto & part : parts) - { - RangesInDataPart ranges(part, part_index++); - - if (data.hasPrimaryKey()) - ranges.ranges = markRangesFromPKRange(part->index, key_condition, settings); else - ranges.ranges = MarkRanges{MarkRange{0, part->marks_count}}; - - if (!ranges.ranges.empty()) { - parts_with_ranges.push_back(ranges); - - sum_ranges += ranges.ranges.size(); - for (const auto & range : ranges.ranges) - sum_marks += range.end - range.begin; + res = spreadMarkRangesAmongStreams( + std::move(parts_with_ranges), + num_streams, + column_names_to_read, + max_block_size, + settings.use_uncompressed_cache, + query_info.prewhere_info, + virt_column_names, + settings); } + + if (use_sampling) + for (auto & stream : res) + stream = std::make_shared(stream, filter_expression, filter_function->getColumnName()); + + /// By the way, if a distributed query or query to a Merge table is made, then the `_sample_factor` column can have different values. + if (sample_factor_column_queried) + for (auto & stream : res) + stream = std::make_shared>( + stream, std::make_shared(), used_sample_factor, "_sample_factor"); + + if (query_info.prewhere_info && query_info.prewhere_info->remove_columns_actions) + for (auto & stream : res) + stream = std::make_shared(stream, query_info.prewhere_info->remove_columns_actions); + + return res; } - LOG_DEBUG(log, "Selected " << parts.size() << " parts by date, " << parts_with_ranges.size() << " parts by key, " - << sum_marks << " marks to read from " << sum_ranges << " ranges"); - if (parts_with_ranges.empty()) - return {}; - - ProfileEvents::increment(ProfileEvents::SelectedParts, parts_with_ranges.size()); - ProfileEvents::increment(ProfileEvents::SelectedRanges, sum_ranges); - ProfileEvents::increment(ProfileEvents::SelectedMarks, sum_marks); - - BlockInputStreams res; - - if (select.final()) + BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( + RangesInDataParts && parts, + size_t num_streams, + const Names & column_names, + size_t max_block_size, + bool use_uncompressed_cache, + const PrewhereInfoPtr & prewhere_info, + const Names & virt_columns, + const Settings & settings) const { - /// Add columns needed to calculate the sorting expression and the sign. - std::vector add_columns = data.sorting_key_expr->getRequiredColumns(); - column_names_to_read.insert(column_names_to_read.end(), add_columns.begin(), add_columns.end()); + const size_t min_marks_for_concurrent_read = + (settings.merge_tree_min_rows_for_concurrent_read + data.index_granularity - 1) / data.index_granularity; + const size_t max_marks_to_use_cache = + (settings.merge_tree_max_rows_to_use_cache + data.index_granularity - 1) / data.index_granularity; - if (!data.merging_params.sign_column.empty()) - column_names_to_read.push_back(data.merging_params.sign_column); - if (!data.merging_params.version_column.empty()) - column_names_to_read.push_back(data.merging_params.version_column); - - std::sort(column_names_to_read.begin(), column_names_to_read.end()); - column_names_to_read.erase(std::unique(column_names_to_read.begin(), column_names_to_read.end()), column_names_to_read.end()); - - res = spreadMarkRangesAmongStreamsFinal( - std::move(parts_with_ranges), - column_names_to_read, - max_block_size, - settings.use_uncompressed_cache, - query_info.prewhere_info, - virt_column_names, - settings); - } - else - { - res = spreadMarkRangesAmongStreams( - std::move(parts_with_ranges), - num_streams, - column_names_to_read, - max_block_size, - settings.use_uncompressed_cache, - query_info.prewhere_info, - virt_column_names, - settings); - } - - if (use_sampling) - for (auto & stream : res) - stream = std::make_shared(stream, filter_expression, filter_function->getColumnName()); - - /// By the way, if a distributed query or query to a Merge table is made, then the `_sample_factor` column can have different values. - if (sample_factor_column_queried) - for (auto & stream : res) - stream = std::make_shared>( - stream, std::make_shared(), used_sample_factor, "_sample_factor"); - - if (query_info.prewhere_info && query_info.prewhere_info->remove_columns_actions) - for (auto & stream : res) - stream = std::make_shared(stream, query_info.prewhere_info->remove_columns_actions); - - return res; -} - - -BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( - RangesInDataParts && parts, - size_t num_streams, - const Names & column_names, - size_t max_block_size, - bool use_uncompressed_cache, - const PrewhereInfoPtr & prewhere_info, - const Names & virt_columns, - const Settings & settings) const -{ - const size_t min_marks_for_concurrent_read = - (settings.merge_tree_min_rows_for_concurrent_read + data.index_granularity - 1) / data.index_granularity; - const size_t max_marks_to_use_cache = - (settings.merge_tree_max_rows_to_use_cache + data.index_granularity - 1) / data.index_granularity; - - /// Count marks for each part. - std::vector sum_marks_in_parts(parts.size()); - size_t sum_marks = 0; - for (size_t i = 0; i < parts.size(); ++i) - { - /// Let the ranges be listed from right to left so that the leftmost range can be dropped using `pop_back()`. - std::reverse(parts[i].ranges.begin(), parts[i].ranges.end()); - - for (const auto & range : parts[i].ranges) - sum_marks_in_parts[i] += range.end - range.begin; - - sum_marks += sum_marks_in_parts[i]; - } - - if (sum_marks > max_marks_to_use_cache) - use_uncompressed_cache = false; - - BlockInputStreams res; - - if (sum_marks > 0 && settings.merge_tree_uniform_read_distribution == 1) - { - /// Reduce the number of num_streams if the data is small. - if (sum_marks < num_streams * min_marks_for_concurrent_read && parts.size() < num_streams) - num_streams = std::max((sum_marks + min_marks_for_concurrent_read - 1) / min_marks_for_concurrent_read, parts.size()); - - MergeTreeReadPoolPtr pool = std::make_shared( - num_streams, sum_marks, min_marks_for_concurrent_read, parts, data, prewhere_info, true, - column_names, MergeTreeReadPool::BackoffSettings(settings), settings.preferred_block_size_bytes, false); - - /// Let's estimate total number of rows for progress bar. - const size_t total_rows = data.index_granularity * sum_marks; - LOG_TRACE(log, "Reading approx. " << total_rows << " rows with " << num_streams << " streams"); - - for (size_t i = 0; i < num_streams; ++i) + /// Count marks for each part. + std::vector sum_marks_in_parts(parts.size()); + size_t sum_marks = 0; + for (size_t i = 0; i < parts.size(); ++i) { - res.emplace_back(std::make_shared( - i, pool, min_marks_for_concurrent_read, max_block_size, settings.preferred_block_size_bytes, - settings.preferred_max_column_in_block_size_bytes, data, use_uncompressed_cache, - prewhere_info, settings, virt_columns)); + /// Let the ranges be listed from right to left so that the leftmost range can be dropped using `pop_back()`. + std::reverse(parts[i].ranges.begin(), parts[i].ranges.end()); - if (i == 0) + for (const auto & range : parts[i].ranges) + sum_marks_in_parts[i] += range.end - range.begin; + + sum_marks += sum_marks_in_parts[i]; + } + + if (sum_marks > max_marks_to_use_cache) + use_uncompressed_cache = false; + + BlockInputStreams res; + + if (sum_marks > 0 && settings.merge_tree_uniform_read_distribution == 1) + { + /// Reduce the number of num_streams if the data is small. + if (sum_marks < num_streams * min_marks_for_concurrent_read && parts.size() < num_streams) + num_streams = std::max((sum_marks + min_marks_for_concurrent_read - 1) / min_marks_for_concurrent_read, parts.size()); + + MergeTreeReadPoolPtr pool = std::make_shared( + num_streams, sum_marks, min_marks_for_concurrent_read, parts, data, prewhere_info, true, + column_names, MergeTreeReadPool::BackoffSettings(settings), settings.preferred_block_size_bytes, false); + + /// Let's estimate total number of rows for progress bar. + const size_t total_rows = data.index_granularity * sum_marks; + LOG_TRACE(log, "Reading approx. " << total_rows << " rows with " << num_streams << " streams"); + + for (size_t i = 0; i < num_streams; ++i) { - /// Set the approximate number of rows for the first source only - static_cast(*res.front()).addTotalRowsApprox(total_rows); + res.emplace_back(std::make_shared( + i, pool, min_marks_for_concurrent_read, max_block_size, settings.preferred_block_size_bytes, + settings.preferred_max_column_in_block_size_bytes, data, use_uncompressed_cache, + prewhere_info, settings, virt_columns)); + + if (i == 0) + { + /// Set the approximate number of rows for the first source only + static_cast(*res.front()).addTotalRowsApprox(total_rows); + } } } - } - else if (sum_marks > 0) - { - const size_t min_marks_per_stream = (sum_marks - 1) / num_streams + 1; - - for (size_t i = 0; i < num_streams && !parts.empty(); ++i) + else if (sum_marks > 0) { - size_t need_marks = min_marks_per_stream; + const size_t min_marks_per_stream = (sum_marks - 1) / num_streams + 1; - /// Loop over parts. - /// We will iteratively take part or some subrange of a part from the back - /// and assign a stream to read from it. - while (need_marks > 0 && !parts.empty()) + for (size_t i = 0; i < num_streams && !parts.empty(); ++i) { - RangesInDataPart part = parts.back(); - parts.pop_back(); + size_t need_marks = min_marks_per_stream; - size_t & marks_in_part = sum_marks_in_parts.back(); - - /// We will not take too few rows from a part. - if (marks_in_part >= min_marks_for_concurrent_read && - need_marks < min_marks_for_concurrent_read) - need_marks = min_marks_for_concurrent_read; - - /// Do not leave too few rows in the part. - if (marks_in_part > need_marks && - marks_in_part - need_marks < min_marks_for_concurrent_read) - need_marks = marks_in_part; - - MarkRanges ranges_to_get_from_part; - - /// We take the whole part if it is small enough. - if (marks_in_part <= need_marks) + /// Loop over parts. + /// We will iteratively take part or some subrange of a part from the back + /// and assign a stream to read from it. + while (need_marks > 0 && !parts.empty()) { - /// Restore the order of segments. - std::reverse(part.ranges.begin(), part.ranges.end()); + RangesInDataPart part = parts.back(); + parts.pop_back(); - ranges_to_get_from_part = part.ranges; + size_t & marks_in_part = sum_marks_in_parts.back(); - need_marks -= marks_in_part; - sum_marks_in_parts.pop_back(); - } - else - { - /// Loop through ranges in part. Take enough ranges to cover "need_marks". - while (need_marks > 0) + /// We will not take too few rows from a part. + if (marks_in_part >= min_marks_for_concurrent_read && + need_marks < min_marks_for_concurrent_read) + need_marks = min_marks_for_concurrent_read; + + /// Do not leave too few rows in the part. + if (marks_in_part > need_marks && + marks_in_part - need_marks < min_marks_for_concurrent_read) + need_marks = marks_in_part; + + MarkRanges ranges_to_get_from_part; + + /// We take the whole part if it is small enough. + if (marks_in_part <= need_marks) { - if (part.ranges.empty()) - throw Exception("Unexpected end of ranges while spreading marks among streams", ErrorCodes::LOGICAL_ERROR); + /// Restore the order of segments. + std::reverse(part.ranges.begin(), part.ranges.end()); - MarkRange & range = part.ranges.back(); + ranges_to_get_from_part = part.ranges; - const size_t marks_in_range = range.end - range.begin; - const size_t marks_to_get_from_range = std::min(marks_in_range, need_marks); - - ranges_to_get_from_part.emplace_back(range.begin, range.begin + marks_to_get_from_range); - range.begin += marks_to_get_from_range; - marks_in_part -= marks_to_get_from_range; - need_marks -= marks_to_get_from_range; - if (range.begin == range.end) - part.ranges.pop_back(); + need_marks -= marks_in_part; + sum_marks_in_parts.pop_back(); } - parts.emplace_back(part); + else + { + /// Loop through ranges in part. Take enough ranges to cover "need_marks". + while (need_marks > 0) + { + if (part.ranges.empty()) + throw Exception("Unexpected end of ranges while spreading marks among streams", ErrorCodes::LOGICAL_ERROR); + + MarkRange & range = part.ranges.back(); + + const size_t marks_in_range = range.end - range.begin; + const size_t marks_to_get_from_range = std::min(marks_in_range, need_marks); + + ranges_to_get_from_part.emplace_back(range.begin, range.begin + marks_to_get_from_range); + range.begin += marks_to_get_from_range; + marks_in_part -= marks_to_get_from_range; + need_marks -= marks_to_get_from_range; + if (range.begin == range.end) + part.ranges.pop_back(); + } + parts.emplace_back(part); + } + + BlockInputStreamPtr source_stream = std::make_shared( + data, part.data_part, max_block_size, settings.preferred_block_size_bytes, + settings.preferred_max_column_in_block_size_bytes, column_names, ranges_to_get_from_part, + use_uncompressed_cache, prewhere_info, true, settings.min_bytes_to_use_direct_io, + settings.max_read_buffer_size, true, virt_columns, part.part_index_in_query); + + res.push_back(source_stream); } - - BlockInputStreamPtr source_stream = std::make_shared( - data, part.data_part, max_block_size, settings.preferred_block_size_bytes, - settings.preferred_max_column_in_block_size_bytes, column_names, ranges_to_get_from_part, - use_uncompressed_cache, prewhere_info, true, settings.min_bytes_to_use_direct_io, - settings.max_read_buffer_size, true, virt_columns, part.part_index_in_query); - - res.push_back(source_stream); } + + if (!parts.empty()) + throw Exception("Couldn't spread marks among streams", ErrorCodes::LOGICAL_ERROR); } - if (!parts.empty()) - throw Exception("Couldn't spread marks among streams", ErrorCodes::LOGICAL_ERROR); + return res; } - return res; -} - -BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( - RangesInDataParts && parts, - const Names & column_names, - size_t max_block_size, - bool use_uncompressed_cache, - const PrewhereInfoPtr & prewhere_info, - const Names & virt_columns, - const Settings & settings) const -{ - const size_t max_marks_to_use_cache = - (settings.merge_tree_max_rows_to_use_cache + data.index_granularity - 1) / data.index_granularity; - - size_t sum_marks = 0; - for (size_t i = 0; i < parts.size(); ++i) - for (size_t j = 0; j < parts[i].ranges.size(); ++j) - sum_marks += parts[i].ranges[j].end - parts[i].ranges[j].begin; - - if (sum_marks > max_marks_to_use_cache) - use_uncompressed_cache = false; - - BlockInputStreams to_merge; - - /// NOTE `merge_tree_uniform_read_distribution` is not used for FINAL - - for (size_t part_index = 0; part_index < parts.size(); ++part_index) + BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( + RangesInDataParts && parts, + const Names & column_names, + size_t max_block_size, + bool use_uncompressed_cache, + const PrewhereInfoPtr & prewhere_info, + const Names & virt_columns, + const Settings & settings) const { - RangesInDataPart & part = parts[part_index]; + const size_t max_marks_to_use_cache = + (settings.merge_tree_max_rows_to_use_cache + data.index_granularity - 1) / data.index_granularity; - BlockInputStreamPtr source_stream = std::make_shared( - data, part.data_part, max_block_size, settings.preferred_block_size_bytes, - settings.preferred_max_column_in_block_size_bytes, column_names, part.ranges, use_uncompressed_cache, - prewhere_info, true, settings.min_bytes_to_use_direct_io, settings.max_read_buffer_size, true, - virt_columns, part.part_index_in_query); + size_t sum_marks = 0; + for (size_t i = 0; i < parts.size(); ++i) + for (size_t j = 0; j < parts[i].ranges.size(); ++j) + sum_marks += parts[i].ranges[j].end - parts[i].ranges[j].begin; - to_merge.emplace_back(std::make_shared(source_stream, data.sorting_key_expr)); + if (sum_marks > max_marks_to_use_cache) + use_uncompressed_cache = false; + + BlockInputStreams to_merge; + + /// NOTE `merge_tree_uniform_read_distribution` is not used for FINAL + + for (size_t part_index = 0; part_index < parts.size(); ++part_index) + { + RangesInDataPart & part = parts[part_index]; + + BlockInputStreamPtr source_stream = std::make_shared( + data, part.data_part, max_block_size, settings.preferred_block_size_bytes, + settings.preferred_max_column_in_block_size_bytes, column_names, part.ranges, use_uncompressed_cache, + prewhere_info, true, settings.min_bytes_to_use_direct_io, settings.max_read_buffer_size, true, + virt_columns, part.part_index_in_query); + + to_merge.emplace_back(std::make_shared(source_stream, data.sorting_key_expr)); + } + + Names sort_columns = data.sorting_key_columns; + SortDescription sort_description; + size_t sort_columns_size = sort_columns.size(); + sort_description.reserve(sort_columns_size); + + Block header = to_merge.at(0)->getHeader(); + for (size_t i = 0; i < sort_columns_size; ++i) + sort_description.emplace_back(header.getPositionByName(sort_columns[i]), 1, 1); + + BlockInputStreamPtr merged; + switch (data.merging_params.mode) + { + case MergeTreeData::MergingParams::Ordinary: + merged = std::make_shared(to_merge, sort_description, max_block_size); + break; + + case MergeTreeData::MergingParams::Collapsing: + merged = std::make_shared( + to_merge, sort_description, data.merging_params.sign_column); + break; + + case MergeTreeData::MergingParams::Summing: + merged = std::make_shared(to_merge, + sort_description, data.merging_params.columns_to_sum, max_block_size); + break; + + case MergeTreeData::MergingParams::Aggregating: + merged = std::make_shared(to_merge, sort_description, max_block_size); + break; + + case MergeTreeData::MergingParams::Replacing: /// TODO Make ReplacingFinalBlockInputStream + merged = std::make_shared(to_merge, + sort_description, data.merging_params.version_column, max_block_size); + break; + + case MergeTreeData::MergingParams::VersionedCollapsing: /// TODO Make VersionedCollapsingFinalBlockInputStream + merged = std::make_shared( + to_merge, sort_description, data.merging_params.sign_column, max_block_size); + break; + + case MergeTreeData::MergingParams::Graphite: + throw Exception("GraphiteMergeTree doesn't support FINAL", ErrorCodes::LOGICAL_ERROR); + } + + return {merged}; } - Names sort_columns = data.sorting_key_columns; - SortDescription sort_description; - size_t sort_columns_size = sort_columns.size(); - sort_description.reserve(sort_columns_size); - Block header = to_merge.at(0)->getHeader(); - for (size_t i = 0; i < sort_columns_size; ++i) - sort_description.emplace_back(header.getPositionByName(sort_columns[i]), 1, 1); - - BlockInputStreamPtr merged; - switch (data.merging_params.mode) + void MergeTreeDataSelectExecutor::createPositiveSignCondition( + ExpressionActionsPtr & out_expression, String & out_column, const Context & context) const { - case MergeTreeData::MergingParams::Ordinary: - merged = std::make_shared(to_merge, sort_description, max_block_size); - break; + auto function = std::make_shared(); + auto arguments = std::make_shared(); + auto sign = std::make_shared(data.merging_params.sign_column); + auto one = std::make_shared(1); - case MergeTreeData::MergingParams::Collapsing: - merged = std::make_shared( - to_merge, sort_description, data.merging_params.sign_column); - break; + function->name = "equals"; + function->arguments = arguments; + function->children.push_back(arguments); - case MergeTreeData::MergingParams::Summing: - merged = std::make_shared(to_merge, - sort_description, data.merging_params.columns_to_sum, max_block_size); - break; + arguments->children.push_back(sign); + arguments->children.push_back(one); - case MergeTreeData::MergingParams::Aggregating: - merged = std::make_shared(to_merge, sort_description, max_block_size); - break; - - case MergeTreeData::MergingParams::Replacing: /// TODO Make ReplacingFinalBlockInputStream - merged = std::make_shared(to_merge, - sort_description, data.merging_params.version_column, max_block_size); - break; - - case MergeTreeData::MergingParams::VersionedCollapsing: /// TODO Make VersionedCollapsingFinalBlockInputStream - merged = std::make_shared( - to_merge, sort_description, data.merging_params.sign_column, max_block_size); - break; - - case MergeTreeData::MergingParams::Graphite: - throw Exception("GraphiteMergeTree doesn't support FINAL", ErrorCodes::LOGICAL_ERROR); + ASTPtr query = function; + auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, data.getColumns().getAllPhysical()); + out_expression = ExpressionAnalyzer(query, syntax_result, context).getActions(false); + out_column = function->getColumnName(); } - return {merged}; -} - - -void MergeTreeDataSelectExecutor::createPositiveSignCondition( - ExpressionActionsPtr & out_expression, String & out_column, const Context & context) const -{ - auto function = std::make_shared(); - auto arguments = std::make_shared(); - auto sign = std::make_shared(data.merging_params.sign_column); - auto one = std::make_shared(1); - - function->name = "equals"; - function->arguments = arguments; - function->children.push_back(arguments); - - arguments->children.push_back(sign); - arguments->children.push_back(one); - - ASTPtr query = function; - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, data.getColumns().getAllPhysical()); - out_expression = ExpressionAnalyzer(query, syntax_result, context).getActions(false); - out_column = function->getColumnName(); -} - /// Calculates a set of mark ranges, that could possibly contain keys, required by condition. /// In other words, it removes subranges from whole range, that definitely could not contain required keys. -MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( - const MergeTreeData::DataPart::Index & index, const KeyCondition & key_condition, const Settings & settings) const -{ - MarkRanges res; - - size_t marks_count = index.at(0)->size(); - if (marks_count == 0) - return res; - - /// If index is not used. - if (key_condition.alwaysUnknownOrTrue()) + MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( + const MergeTreeData::DataPart::Index & index, const KeyCondition & key_condition, const Settings & settings) const { - res.push_back(MarkRange(0, marks_count)); - } - else - { - size_t used_key_size = key_condition.getMaxKeyColumn() + 1; - size_t min_marks_for_seek = (settings.merge_tree_min_rows_for_seek + data.index_granularity - 1) / data.index_granularity; + MarkRanges res; - /** There will always be disjoint suspicious segments on the stack, the leftmost one at the top (back). - * At each step, take the left segment and check if it fits. - * If fits, split it into smaller ones and put them on the stack. If not, discard it. - * If the segment is already of one mark length, add it to response and discard it. - */ - std::vector ranges_stack{ {0, marks_count} }; - - /// NOTE Creating temporary Field objects to pass to KeyCondition. - Row index_left(used_key_size); - Row index_right(used_key_size); - - while (!ranges_stack.empty())/// In other words, it removes subranges from whole range, that definitely could not contain required keys. + size_t marks_count = index.at(0)->size(); + if (marks_count == 0) + return res; + /// If index is not used. + if (key_condition.alwaysUnknownOrTrue()) { - MarkRange range = ranges_stack.back(); - ranges_stack.pop_back(); + res.push_back(MarkRange(0, marks_count)); + } + else + { + size_t used_key_size = key_condition.getMaxKeyColumn() + 1; + size_t min_marks_for_seek = (settings.merge_tree_min_rows_for_seek + data.index_granularity - 1) / data.index_granularity; + + /** There will always be disjoint suspicious segments on the stack, the leftmost one at the top (back). + * At each step, take the left segment and check if it fits. + * If fits, split it into smaller ones and put them on the stack. If not, discard it. + * If the segment is already of one mark length, add it to response and discard it. + */ + std::vector ranges_stack{ {0, marks_count} }; + + /// NOTE Creating temporary Field objects to pass to KeyCondition. + Row index_left(used_key_size); + Row index_right(used_key_size); + + while (!ranges_stack.empty())/// In other words, it removes subranges from whole range, that definitely could not contain required keys. - bool may_be_true; - if (range.end == marks_count) { - for (size_t i = 0; i < used_key_size; ++i) + MarkRange range = ranges_stack.back(); + ranges_stack.pop_back(); + + bool may_be_true; + if (range.end == marks_count) { - index[i]->get(range.begin, index_left[i]); + for (size_t i = 0; i < used_key_size; ++i) + { + index[i]->get(range.begin, index_left[i]); + } + + may_be_true = key_condition.mayBeTrueAfter( + used_key_size, index_left.data(), data.primary_key_data_types); } - - may_be_true = key_condition.mayBeTrueAfter( - used_key_size, index_left.data(), data.primary_key_data_types); - } - else - { - for (size_t i = 0; i < used_key_size; ++i) - { - index[i]->get(range.begin, index_left[i]); - index[i]->get(range.end, index_right[i]); - } - - may_be_true = key_condition.mayBeTrueInRange( - used_key_size, index_left.data(), index_right.data(), data.primary_key_data_types); - } - - if (!may_be_true) - continue; - - if (range.end == range.begin + 1) - { - /// We saw a useful gap between neighboring marks. Either add it to the last range, or start a new range. - if (res.empty() || range.begin - res.back().end > min_marks_for_seek) - res.push_back(range); else - res.back().end = range.end; - } - else - { - /// Break the segment and put the result on the stack from right to left. - size_t step = (range.end - range.begin - 1) / settings.merge_tree_coarse_index_granularity + 1; - size_t end; + { + for (size_t i = 0; i < used_key_size; ++i) + { + index[i]->get(range.begin, index_left[i]); + index[i]->get(range.end, index_right[i]); + } - for (end = range.end; end > range.begin + step; end -= step) - ranges_stack.push_back(MarkRange(end - step, end)); + may_be_true = key_condition.mayBeTrueInRange( + used_key_size, index_left.data(), index_right.data(), data.primary_key_data_types); + } - ranges_stack.push_back(MarkRange(range.begin, end)); + if (!may_be_true) + continue; + + if (range.end == range.begin + 1) + { + /// We saw a useful gap between neighboring marks. Either add it to the last range, or start a new range. + if (res.empty() || range.begin - res.back().end > min_marks_for_seek) + res.push_back(range); + else + res.back().end = range.end; + } + else + { + /// Break the segment and put the result on the stack from right to left. + size_t step = (range.end - range.begin - 1) / settings.merge_tree_coarse_index_granularity + 1; + size_t end; + + for (end = range.end; end > range.begin + step; end -= step) + ranges_stack.push_back(MarkRange(end - step, end)); + + ranges_stack.push_back(MarkRange(range.begin, end)); + } } } + + return res; } - return res; -} - -} +} \ No newline at end of file diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h index c376e91ce42..d6a9215ca7e 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h @@ -49,6 +49,7 @@ public: protected: IndexCondition() = default; +public: MergeTreeIndexPartPtr part; }; @@ -80,6 +81,7 @@ protected: virtual void updateImpl(const Block & block, const Names & column_names) = 0; virtual void mergeImpl(const MergeTreeIndexPart & other) = 0; +public: MergeTreeIndexPtr index; }; From 33cf4c96e666ad930b3df71b67a22098b7b9c123 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 29 Dec 2018 16:04:22 +0300 Subject: [PATCH 015/586] fix --- dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 0d61a850c99..7d1a228a2ec 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -531,9 +531,7 @@ namespace DB for (const auto index_part : part->index_parts) { const auto condition = index_part->createIndexConditionOnPart( query_info, context, index_part->index->sample.getNames(), index_part->index->expr); - if (condition->alwaysUnknownOrTrue()) { - continue; - } else { + if (!condition->alwaysUnknownOrTrue()) { ranges.ranges = condition->filterRanges(ranges.ranges); } } From ad2a453919790e5604ef1b40b02fde1c335dbfdf Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 29 Dec 2018 16:11:30 +0300 Subject: [PATCH 016/586] changed comment --- dbms/src/Storages/MergeTree/MergeTreeIndexes.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h index d6a9215ca7e..1a92e4aa0a9 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h @@ -39,11 +39,10 @@ public: virtual INDEX_TYPE indexType() const; - // Checks if this index is useful for query. + /// Checks if this index is useful for query. virtual bool alwaysUnknownOrTrue() const = 0; - // Splits it's input to ranges with length equal to index granularity (or less on the borders). - // Then it reads necessary ranges of the index and decides what ranges from the input should be reduced. + /// Drops out ranges where query is false virtual MarkRanges filterRanges(const MarkRanges & ranges) const = 0; protected: From 1b7c0aeef128a69b297c51f1c64fcf742b6188f7 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 2 Jan 2019 17:24:26 +0300 Subject: [PATCH 017/586] fix --- dbms/src/Parsers/ParserCreateQuery.h | 2 +- .../MergeTree/MergeTreeDataMergerMutator.cpp | 17 ++++++++++ .../MergeTree/MergeTreeDataSelectExecutor.cpp | 7 ++-- .../Storages/MergeTree/MergeTreeIndexes.cpp | 17 ++-------- .../src/Storages/MergeTree/MergeTreeIndexes.h | 34 +++++++------------ 5 files changed, 36 insertions(+), 41 deletions(-) diff --git a/dbms/src/Parsers/ParserCreateQuery.h b/dbms/src/Parsers/ParserCreateQuery.h index 1529b413008..f9f19177a44 100644 --- a/dbms/src/Parsers/ParserCreateQuery.h +++ b/dbms/src/Parsers/ParserCreateQuery.h @@ -208,7 +208,7 @@ public: ParserIndexDeclaration() {} protected: - const char * getName() const override { return "INDEX query"; } + const char * getName() const override { return "INDEX"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index a36741ba143..ef2d56a4f83 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -811,6 +811,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor Poco::File(rows_sources_file_path).remove(); } + // TODO: здесь надо как-то мержить индекс или в MergedBlockOutputStream + for (const auto & part : parts) new_data_part->minmax_idx.merge(part->minmax_idx); @@ -915,6 +917,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor while (check_not_cancelled() && (block = in->read())) { minmax_idx.update(block, data.minmax_idx_columns); + // TODO: насчитывать индексы + /// Supposing data is sorted we can calculate indexes there out.write(block); } @@ -929,6 +933,18 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor /// We will modify only some of the columns. Other columns and key values can be copied as-is. /// TODO: check that we modify only non-key columns in this case. + for (const auto& col : in_header.getNames()) { + for (const auto& index_part : source_part->index_parts) { + const auto index_cols = index_part->index->sample.getNames(); + auto it = find(cbegin(index_cols), cend(index_cols), col); + if (it != cend(index_cols)) { + throw Exception("You can not modify columns used in index. Index name: '" + + index_part->index->name + + "' bad column:" + *it, ErrorCodes::ILLEGAL_COLUMN); + } + } + } + NameSet files_to_skip = {"checksums.txt", "columns.txt"}; for (const auto & entry : in_header) { @@ -999,6 +1015,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor new_data_part->index = source_part->index; new_data_part->partition.assign(source_part->partition); new_data_part->minmax_idx = source_part->minmax_idx; + new_data_part->index_parts = source_part->index_parts; new_data_part->modification_time = time(nullptr); new_data_part->bytes_on_disk = MergeTreeData::DataPart::calculateTotalSizeOnDisk(new_data_part->getFullPath()); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 7d1a228a2ec..4eff75e567d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -528,9 +528,10 @@ namespace DB else ranges.ranges = MarkRanges{MarkRange{0, part->marks_count}}; - for (const auto index_part : part->index_parts) { - const auto condition = index_part->createIndexConditionOnPart( - query_info, context, index_part->index->sample.getNames(), index_part->index->expr); + /// It can be done in multiple threads (one thread for each part). + /// Maybe it should be moved to BlockInputStream, but it can cause some problems. + for (auto index_part : part->index_parts) { + auto condition = index_part->createIndexConditionOnPart(query_info, context); if (!condition->alwaysUnknownOrTrue()) { ranges.ranges = condition->filterRanges(ranges.ranges); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp index 1a31b260b78..f1ba281a747 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp @@ -11,28 +11,15 @@ namespace ErrorCodes } -INDEX_TYPE IndexCondition::indexType() const { +IndexType IndexCondition::indexType() const { return part->indexType(); } -INDEX_TYPE MergeTreeIndexPart::indexType() const { +IndexType MergeTreeIndexPart::indexType() const { return index->indexType(); } -void MergeTreeIndexPart::update(const Block & block, const Names & column_names) { - /// a few checks? - updateImpl(block, column_names); -} - -void MergeTreeIndexPart::merge(const MergeTreeIndexPart & other) { - if (other.indexType() != indexType()) { - throw Exception("MergeTreeIndexPart: Merging index part with another index type.", - ErrorCodes::LOGICAL_ERROR); - } - mergeImpl(other); -} - void MergeTreeIndexFactory::registerIndex(const std::string &name, Creator creator) { if (!indexes.emplace(name, std::move(creator)).second) diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h index 1a92e4aa0a9..1cf6c988bdb 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -14,7 +15,7 @@ namespace DB { -enum class INDEX_TYPE { +enum class IndexType { NONE = 0 }; @@ -22,8 +23,9 @@ enum class INDEX_TYPE { class MergeTreeIndex; struct MergeTreeIndexPart; -using MergeTreeIndexPtr = std::shared_ptr; -using MergeTreeIndexes = std::vector; +using MergeTreeIndexPtr = std::shared_ptr; +using MutableMergeTreeIndexPtr = std::shared_ptr; +using MergeTreeIndexes = std::vector; using MergeTreeIndexPartPtr = std::shared_ptr; using MergeTreeIndexParts = std::vector; @@ -37,7 +39,7 @@ class IndexCondition { public: virtual ~IndexCondition() = default; - virtual INDEX_TYPE indexType() const; + virtual IndexType indexType() const; /// Checks if this index is useful for query. virtual bool alwaysUnknownOrTrue() const = 0; @@ -61,27 +63,17 @@ struct MergeTreeIndexPart friend MergeTreeIndex; public: + MergeTreeIndexPart() = default; virtual ~MergeTreeIndexPart() = default; - virtual INDEX_TYPE indexType() const; + virtual IndexType indexType() const; - void update(const Block & block, const Names & column_names); - void merge(const MergeTreeIndexPart & other); + virtual MergeTreeIndexPartPtr cloneEmpty() const = 0; virtual IndexConditionPtr createIndexConditionOnPart( - const SelectQueryInfo & query_info - , const Context & context - , const Names & key_column_names - , const ExpressionActionsPtr & key_expr) const = 0; + const SelectQueryInfo & query_info, const Context & context) const = 0; -protected: - MergeTreeIndexPart() = default; - - virtual void updateImpl(const Block & block, const Names & column_names) = 0; - virtual void mergeImpl(const MergeTreeIndexPart & other) = 0; - -public: - MergeTreeIndexPtr index; + MergeTreeIndexPtr index; // if parts can migrate to another tables it can be bad }; @@ -94,9 +86,7 @@ public: virtual ~MergeTreeIndex() {}; - virtual INDEX_TYPE indexType() const = 0; - - virtual MergeTreeIndexPartPtr createEmptyIndexPart() const = 0; + virtual IndexType indexType() const = 0; String name; ExpressionActionsPtr expr; From f704a32515b4da6225998303ea3beb6e2b2f9586 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 2 Jan 2019 19:04:44 +0300 Subject: [PATCH 018/586] added granularity --- dbms/src/Parsers/ASTIndexDeclaration.h | 7 ++++++- dbms/src/Parsers/ParserCreateQuery.cpp | 10 ++++++++++ dbms/src/Parsers/ParserCreateQuery.h | 4 ++-- dbms/src/Parsers/tests/create_parser.cpp | 2 +- dbms/src/Storages/MergeTree/MergeTreeIndexes.h | 5 +++-- 5 files changed, 22 insertions(+), 6 deletions(-) diff --git a/dbms/src/Parsers/ASTIndexDeclaration.h b/dbms/src/Parsers/ASTIndexDeclaration.h index 3195a9c4bfe..eeeec0755a3 100644 --- a/dbms/src/Parsers/ASTIndexDeclaration.h +++ b/dbms/src/Parsers/ASTIndexDeclaration.h @@ -1,9 +1,11 @@ #pragma once +#include #include -#include +#include #include #include +#include #include @@ -19,6 +21,7 @@ public: String name; IAST * expr; ASTFunction * type; + Field granularity; //TODO: params (GRANULARITY number or SETTINGS a=b, c=d, ..)? /** Get the text that identifies this element. */ @@ -42,6 +45,8 @@ public: expr->formatImpl(s, state, frame); s.ostr << (s.hilite ? hilite_keyword : "") << " TYPE " << (s.hilite ? hilite_none : ""); type->formatImpl(s, state, frame); + s.ostr << (s.hilite ? hilite_keyword : "") << " GRANULARITY " << (s.hilite ? hilite_none : ""); + s.ostr << applyVisitor(FieldVisitorToString(), granularity); } }; diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index 54a12c64a22..90427bccc0d 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -95,14 +95,17 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe { ParserKeyword s_by("BY"); ParserKeyword s_type("TYPE"); + ParserKeyword s_granularity("GRANULARITY"); ParserIdentifier name_p; ParserIdentifierWithOptionalParameters ident_with_optional_params_p; ParserExpression expression_p; + ParserLiteral granularity_p; ASTPtr name; ASTPtr expr; ASTPtr type; + ASTPtr granularity; if (!name_p.parse(pos, name, expected)) return false; @@ -119,8 +122,15 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe if (!ident_with_optional_params_p.parse(pos, type, expected)) return false; + if (!s_granularity.ignore(pos, expected)) + return false; + + if (!granularity_p.parse(pos, granularity, expected)) + return false; + auto index = std::make_shared(); index->name = typeid_cast(*name).name; + index->granularity = typeid_cast(*granularity).value; index->set(index->expr, expr); index->set(index->type, type); node = index; diff --git a/dbms/src/Parsers/ParserCreateQuery.h b/dbms/src/Parsers/ParserCreateQuery.h index f9f19177a44..d7176d67427 100644 --- a/dbms/src/Parsers/ParserCreateQuery.h +++ b/dbms/src/Parsers/ParserCreateQuery.h @@ -200,7 +200,7 @@ protected: /** - * INDEX name(column1, column2, ...) TYPE typename(arg1, arg2, ...) + * INDEX name BY expr TYPE typename(arg1, arg2, ...) GRANULARITY value */ class ParserIndexDeclaration : public IParserBase { @@ -215,7 +215,7 @@ protected: /** * ENGINE = name [PARTITION BY expr] [ORDER BY expr] [PRIMARY KEY expr] [SAMPLE BY expr] - * [INDEX name(expr) TYPE type(args) ...] [SETTINGS name = value, ...] + * [INDEX name BY expr TYPE type(args) ... GRANULARITY value] [SETTINGS name = value, ...] */ class ParserStorage : public IParserBase { diff --git a/dbms/src/Parsers/tests/create_parser.cpp b/dbms/src/Parsers/tests/create_parser.cpp index 8a591dfa496..4139c930ba2 100644 --- a/dbms/src/Parsers/tests/create_parser.cpp +++ b/dbms/src/Parsers/tests/create_parser.cpp @@ -10,7 +10,7 @@ int main(int, char **) { using namespace DB; - std::string input = "CREATE TABLE hits (URL String, UserAgentMinor2 FixedString(2), EventTime DateTime) ENGINE = MergeTree() ORDER BY EventTime INDEX minmax1 BY (lower(URL), EventTime) TYPE MINMAX(1,2,3)"; + std::string input = "CREATE TABLE hits (URL String, UserAgentMinor2 FixedString(2), EventTime DateTime) ENGINE = MergeTree() ORDER BY EventTime INDEX minmax1 BY (lower(URL), EventTime) TYPE MINMAX(1,2,3) GRANULARITY 10"; ParserCreateQuery parser; ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0); diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h index 1cf6c988bdb..a20bc2f7b28 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h @@ -81,8 +81,8 @@ public: class MergeTreeIndex { public: - MergeTreeIndex(String name, ExpressionActionsPtr expr, Block key) - : name(name), expr(expr), sample(key) {} + MergeTreeIndex(String name, ExpressionActionsPtr expr, size_t granularity, Block key) + : name(name), expr(expr), granularity(granularity), sample(key) {} virtual ~MergeTreeIndex() {}; @@ -90,6 +90,7 @@ public: String name; ExpressionActionsPtr expr; + size_t granularity; Block sample; }; From b2da3a34df6f3a596e83217c8b3d3d3c2fcc308b Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 2 Jan 2019 21:47:07 +0300 Subject: [PATCH 019/586] comments --- dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp | 3 ++- dbms/src/Storages/MergeTree/MergeTreeIndexes.h | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index ef2d56a4f83..014977f8bc3 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -933,6 +933,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor /// We will modify only some of the columns. Other columns and key values can be copied as-is. /// TODO: check that we modify only non-key columns in this case. + /// TODO: just recalc index on part for (const auto& col : in_header.getNames()) { for (const auto& index_part : source_part->index_parts) { const auto index_cols = index_part->index->sample.getNames(); @@ -940,7 +941,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor if (it != cend(index_cols)) { throw Exception("You can not modify columns used in index. Index name: '" + index_part->index->name - + "' bad column:" + *it, ErrorCodes::ILLEGAL_COLUMN); + + "' bad column: '" + *it + "'", ErrorCodes::ILLEGAL_COLUMN); } } } diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h index a20bc2f7b28..44685c3d914 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h @@ -39,7 +39,7 @@ class IndexCondition { public: virtual ~IndexCondition() = default; - virtual IndexType indexType() const; + IndexType indexType() const; /// Checks if this index is useful for query. virtual bool alwaysUnknownOrTrue() const = 0; @@ -58,6 +58,7 @@ using IndexConditionPtr = std::shared_ptr; /// Data structure for operations with index data for each MergeTreeDataPart. +/// Stores information specific for DataPart. struct MergeTreeIndexPart { friend MergeTreeIndex; @@ -66,7 +67,7 @@ public: MergeTreeIndexPart() = default; virtual ~MergeTreeIndexPart() = default; - virtual IndexType indexType() const; + IndexType indexType() const; virtual MergeTreeIndexPartPtr cloneEmpty() const = 0; From 69052b3fc0309e34f0d52e9aa86cf4ef2b0127c3 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 3 Jan 2019 11:10:25 +0300 Subject: [PATCH 020/586] fix --- dbms/src/Parsers/ASTIndexDeclaration.h | 1 - dbms/src/Parsers/ParserCreateQuery.cpp | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/src/Parsers/ASTIndexDeclaration.h b/dbms/src/Parsers/ASTIndexDeclaration.h index eeeec0755a3..be894d52960 100644 --- a/dbms/src/Parsers/ASTIndexDeclaration.h +++ b/dbms/src/Parsers/ASTIndexDeclaration.h @@ -22,7 +22,6 @@ public: IAST * expr; ASTFunction * type; Field granularity; - //TODO: params (GRANULARITY number or SETTINGS a=b, c=d, ..)? /** Get the text that identifies this element. */ String getID(char) const override { return "Index"; } diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index 90427bccc0d..5577cd381b5 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -100,7 +100,7 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe ParserIdentifier name_p; ParserIdentifierWithOptionalParameters ident_with_optional_params_p; ParserExpression expression_p; - ParserLiteral granularity_p; + ParserUnsignedInteger granularity_p; ASTPtr name; ASTPtr expr; From 35dbb94312b0da5c6e5a7ec433eccfd161b350f2 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 3 Jan 2019 19:47:42 +0300 Subject: [PATCH 021/586] fix --- .../MergeTree/MergeTreeDataMergerMutator.cpp | 13 ++++++++---- .../src/Storages/MergeTree/MergeTreeIndexes.h | 20 ++++++++++++++++++- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 014977f8bc3..92236207628 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -620,6 +620,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor } + ExpressionActionsPtr stream_expr = data.sorting_key_expr; + /// TODO: добавлять стобцы из индексов for (const auto & part : parts) { auto input = std::make_unique( @@ -628,11 +630,14 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor input->setProgressCallback(MergeProgressCallback( merge_entry, sum_input_rows_upper_bound, column_sizes, watch_prev_elapsed, merge_alg)); - if (data.hasPrimaryKey()) - src_streams.emplace_back(std::make_shared( - std::make_shared(BlockInputStreamPtr(std::move(input)), data.sorting_key_expr))); - else + if (data.hasPrimaryKey()) { + auto stream = std::make_shared( + std::make_shared( + BlockInputStreamPtr(std::move(input)), stream_expr)); + src_streams.emplace_back(stream); + } else { src_streams.emplace_back(std::move(input)); + } } Names sort_columns = data.sorting_key_columns; diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h index 44685c3d914..25521a5e471 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h @@ -57,8 +57,25 @@ public: using IndexConditionPtr = std::shared_ptr; +struct MergeTreeIndexGranule +{ + friend MergeTreeIndexPart; + +public: + virtual ~MergeTreeIndexGranule(); + + virtual void serializeBinary() const = 0; + virtual void deserializeBinary() const = 0; + + virtual void update(const Block & block, size_t first, size_t last) = 0; +}; + +using MergeTreeIndexGranulePtr = std::shared_ptr; + + /// Data structure for operations with index data for each MergeTreeDataPart. /// Stores information specific for DataPart. +/// Возможно будет убран. struct MergeTreeIndexPart { friend MergeTreeIndex; @@ -71,10 +88,11 @@ public: virtual MergeTreeIndexPartPtr cloneEmpty() const = 0; + virtual MergeTreeIndexGranulePtr createIndexGranule() const = 0; virtual IndexConditionPtr createIndexConditionOnPart( const SelectQueryInfo & query_info, const Context & context) const = 0; - MergeTreeIndexPtr index; // if parts can migrate to another tables it can be bad + MergeTreeIndexPtr index; }; From 92a850ceb04c7e955417e6a5f7429d3799f1ffc0 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 4 Jan 2019 17:33:38 +0300 Subject: [PATCH 022/586] added writing indexes --- .../MergeTree/MergeTreeDataMergerMutator.cpp | 44 ++++--- .../MergeTree/MergeTreeDataMergerMutator.h | 2 +- .../src/Storages/MergeTree/MergeTreeIndexes.h | 14 ++- .../MergeTree/MergedBlockOutputStream.cpp | 109 ++++++++++++++++++ .../MergeTree/MergedBlockOutputStream.h | 4 + 5 files changed, 155 insertions(+), 18 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 92236207628..1eb70920756 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -336,12 +336,18 @@ MergeTreeData::DataPartsVector MergeTreeDataMergerMutator::selectAllPartsFromPar static void extractMergingAndGatheringColumns( const NamesAndTypesList & all_columns, const ExpressionActionsPtr & sorting_key_expr, + const MergeTreeIndexes & indexes, const MergeTreeData::MergingParams & merging_params, NamesAndTypesList & gathering_columns, Names & gathering_column_names, NamesAndTypesList & merging_columns, Names & merging_column_names) { Names sort_key_columns_vec = sorting_key_expr->getRequiredColumns(); std::set key_columns(sort_key_columns_vec.cbegin(), sort_key_columns_vec.cend()); + for (const auto & index : indexes) { + Names index_columns_vec = index->expr->getRequiredColumns(); + std::copy(index_columns_vec.cbegin(), index_columns_vec.cend(), + std::inserter(key_columns, key_columns.end())); + } /// Force sign column for Collapsing mode if (merging_params.mode == MergeTreeData::MergingParams::Collapsing) @@ -549,7 +555,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor NamesAndTypesList gathering_columns, merging_columns; Names gathering_column_names, merging_column_names; extractMergingAndGatheringColumns( - all_columns, data.sorting_key_expr, + all_columns, data.sorting_key_expr, data.indexes, data.merging_params, gathering_columns, gathering_column_names, merging_columns, merging_column_names); MergeTreeData::MutableDataPartPtr new_data_part = std::make_shared( @@ -620,8 +626,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor } - ExpressionActionsPtr stream_expr = data.sorting_key_expr; - /// TODO: добавлять стобцы из индексов for (const auto & part : parts) { auto input = std::make_unique( @@ -630,13 +634,20 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor input->setProgressCallback(MergeProgressCallback( merge_entry, sum_input_rows_upper_bound, column_sizes, watch_prev_elapsed, merge_alg)); + BlockInputStreamPtr stream = std::move(input); + for (const auto & index : data.indexes) { + stream = std::make_shared(stream, index->expr); + } + if (data.hasPrimaryKey()) { - auto stream = std::make_shared( - std::make_shared( - BlockInputStreamPtr(std::move(input)), stream_expr)); - src_streams.emplace_back(stream); + stream = std::make_shared( + BlockInputStreamPtr(std::move(stream)), data.sorting_key_expr); + } + + if (!data.indexes.empty() || data.hasPrimaryKey()) { + src_streams.emplace_back(std::make_shared(stream)); } else { - src_streams.emplace_back(std::move(input)); + src_streams.emplace_back(stream); } } @@ -898,7 +909,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor Poco::File(new_part_tmp_path).createDirectories(); - auto in = mutations_interpreter.execute(); + BlockInputStreamPtr in = mutations_interpreter.execute(); NamesAndTypesList all_columns = data.getColumns().getAllPhysical(); Block in_header = in->getHeader(); @@ -907,9 +918,16 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor { /// All columns are modified, proceed to write a new part from scratch. + for (const auto & index : data.indexes) { + in = std::make_shared(in, index->expr); + } + if (data.hasPrimaryKey()) in = std::make_shared( std::make_shared(in, data.primary_key_expr)); + else if (!data.indexes.empty()) { + in = std::make_shared(in); + } MergeTreeDataPart::MinMaxIndex minmax_idx; @@ -939,13 +957,13 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor /// TODO: check that we modify only non-key columns in this case. /// TODO: just recalc index on part - for (const auto& col : in_header.getNames()) { - for (const auto& index_part : source_part->index_parts) { - const auto index_cols = index_part->index->sample.getNames(); + for (const auto & col : in_header.getNames()) { + for (const auto index : data.indexes) { + const auto & index_cols = index->expr->getRequiredColumns(); auto it = find(cbegin(index_cols), cend(index_cols), col); if (it != cend(index_cols)) { throw Exception("You can not modify columns used in index. Index name: '" - + index_part->index->name + + index->name + "' bad column: '" + *it + "'", ErrorCodes::ILLEGAL_COLUMN); } } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 0d6cdd3f557..dae2aefa086 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -120,7 +120,7 @@ public: enum class MergeAlgorithm { Horizontal, /// per-row merge of all columns - Vertical /// per-row merge of PK columns, per-column gather for non-PK columns + Vertical /// per-row merge of PK and secondary indexes columns, per-column gather for non-PK columns }; private: diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h index 25521a5e471..30fe6fb1cda 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h @@ -64,13 +64,16 @@ struct MergeTreeIndexGranule public: virtual ~MergeTreeIndexGranule(); - virtual void serializeBinary() const = 0; - virtual void deserializeBinary() const = 0; + virtual void serializeBinary(WriteBuffer & ostr) const = 0; + virtual void deserializeBinary(ReadBuffer & istr) const = 0; - virtual void update(const Block & block, size_t first, size_t last) = 0; + virtual bool empty() const = 0; + + virtual void update(const Block & block, size_t * pos, size_t limit) = 0; }; using MergeTreeIndexGranulePtr = std::shared_ptr; +using MergeTreeIndexGranules = std::vector; /// Data structure for operations with index data for each MergeTreeDataPart. @@ -88,7 +91,6 @@ public: virtual MergeTreeIndexPartPtr cloneEmpty() const = 0; - virtual MergeTreeIndexGranulePtr createIndexGranule() const = 0; virtual IndexConditionPtr createIndexConditionOnPart( const SelectQueryInfo & query_info, const Context & context) const = 0; @@ -107,9 +109,13 @@ public: virtual IndexType indexType() const = 0; + virtual MergeTreeIndexGranulePtr createIndexGranule() const = 0; + String name; ExpressionActionsPtr expr; size_t granularity; + Names columns; + DataTypes data_types; Block sample; }; diff --git a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp index f7ac0eb8e25..cb89087d86d 100644 --- a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -16,6 +16,8 @@ namespace constexpr auto DATA_FILE_EXTENSION = ".bin"; constexpr auto MARKS_FILE_EXTENSION = ".mrk"; +constexpr auto INDEX_FILE_EXTENSION = ".idx"; +constexpr auto INDEX_FILE_PREFIX = "skp_idx_"; } @@ -318,6 +320,17 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart( } } + /// Finish skip index serialization + for (size_t i = 0; i < storage.indexes.size(); ++i) + { + auto & stream = *skip_indexes_streams[i]; + if (skip_indexes_granules[i] && !skip_indexes_granules[i]->empty()) { + skip_indexes_granules[i]->serializeBinary(stream.compressed); + skip_indexes_granules[i].reset(); + } + } + + if (!total_column_list) total_column_list = &columns_list; @@ -335,6 +348,14 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart( index_stream = nullptr; } + for (auto & stream : skip_indexes_streams) + { + stream->finalize(); + stream->addToChecksums(checksums); + } + + skip_indexes_streams.clear(); + for (ColumnStreams::iterator it = column_streams.begin(); it != column_streams.end(); ++it) { it->second->finalize(); @@ -391,6 +412,20 @@ void MergedBlockOutputStream::init() part_path + "primary.idx", DBMS_DEFAULT_BUFFER_SIZE, O_TRUNC | O_CREAT | O_WRONLY); index_stream = std::make_unique(*index_file_stream); } + + for (const auto index : storage.indexes) { + String stream_name = INDEX_FILE_PREFIX + index->name; + skip_indexes_streams.emplace_back( + std::move(std::make_unique( + stream_name, + part_path + stream_name, INDEX_FILE_EXTENSION, + part_path + stream_name, MARKS_FILE_EXTENSION, + max_compress_block_size, compression_settings, + 0, aio_threshold))); + + skip_indexes_granules.emplace_back(nullptr); + skip_index_filling.push_back(0); + } } @@ -403,6 +438,10 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm WrittenOffsetColumns offset_columns; auto primary_key_column_names = storage.primary_key_columns; + Names skip_indexes_column_names; + for (const auto index : storage.indexes) { + std::copy(index->columns.cbegin(), index->columns.cend(), std::back_inserter(skip_indexes_column_names)); + } /// Here we will add the columns related to the Primary Key, then write the index. std::vector primary_key_columns(primary_key_column_names.size()); @@ -422,6 +461,23 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm primary_key_columns[i].column = primary_key_columns[i].column->permute(*permutation, 0); } + /// The same for skip indexes columns + std::vector skip_indexes_columns(skip_indexes_column_names.size()); + std::map skip_indexes_column_name_to_position; + + for (size_t i = 0, size = skip_indexes_column_names.size(); i < size; ++i) + { + const auto & name = skip_indexes_column_names[i]; + + skip_indexes_column_name_to_position.emplace(name, i); + + skip_indexes_columns[i] = block.getByName(name); + + /// Reorder index columns in advance. + if (permutation) + skip_indexes_columns[i].column = skip_indexes_columns[i].column->permute(*permutation, 0); + } + if (index_columns.empty()) { index_columns.resize(primary_key_column_names.size()); @@ -452,11 +508,17 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm if (permutation) { auto primary_column_it = primary_key_column_name_to_position.find(it->name); + auto skip_index_column_it = skip_indexes_column_name_to_position.find(it->name); if (primary_key_column_name_to_position.end() != primary_column_it) { auto & primary_column = *primary_key_columns[primary_column_it->second].column; writeData(column.name, *column.type, primary_column, offset_columns, false, serialization_states[i]); } + else if (skip_indexes_column_name_to_position.end() != skip_index_column_it) + { + auto & index_column = *skip_indexes_columns[skip_index_column_it->second].column; + writeData(column.name, *column.type, index_column, offset_columns, false, serialization_states[i]); + } else { /// We rearrange the columns that are not included in the primary key here; Then the result is released - to save RAM. @@ -472,6 +534,53 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm rows_count += rows; + { + /// Filling and writing skip indexes like in IMergedBlockOutputStream::writeData + for (size_t i = 0; i < storage.indexes.size(); ++i) + { + const auto index = storage.indexes[i]; + auto & stream = *skip_indexes_streams[i]; + size_t prev_mark = 0; + + while (prev_mark < rows) { + size_t limit = 0; + if (prev_mark == 0 && index_offset != 0) + { + limit = index_offset; + } + else + { + limit = storage.index_granularity; + if (!skip_indexes_granules[i]) { + skip_indexes_granules[i] = index->createIndexGranule(); + skip_index_filling[i] = 0; + + if (stream.compressed.offset() >= min_compress_block_size) + stream.compressed.next(); + + writeIntBinary(stream.plain_hashing.count(), stream.marks); + writeIntBinary(stream.compressed.offset(), stream.marks); + } + } + + size_t pos = prev_mark; + skip_indexes_granules[i]->update(block, &pos, limit); + + if (pos == prev_mark + limit) { + ++skip_index_filling[i]; + + /// write index if it is filled + if (skip_index_filling[i] == index->granularity) { + skip_indexes_granules[i]->serializeBinary(stream.compressed); + skip_indexes_granules[i].reset(); + skip_index_filling[i] = 0; + } + } + prev_mark = pos; + } + } + } + { /** While filling index (index_columns), disable memory tracker. * Because memory is allocated here (maybe in context of INSERT query), diff --git a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h index a3f6a025c31..f4d7cb60a20 100644 --- a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h +++ b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h @@ -148,6 +148,10 @@ private: std::unique_ptr index_file_stream; std::unique_ptr index_stream; MutableColumns index_columns; + + std::vector> skip_indexes_streams; + MergeTreeIndexGranules skip_indexes_granules; + std::vector skip_index_filling; }; From f92750223ff67a3e22c7122d8c042e718c28210d Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 4 Jan 2019 18:54:34 +0300 Subject: [PATCH 023/586] removed indexpart class --- .../MergeTree/MergeTreeDataMergerMutator.cpp | 1 - .../Storages/MergeTree/MergeTreeDataPart.h | 2 -- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 4 +-- .../Storages/MergeTree/MergeTreeIndexes.cpp | 5 --- .../src/Storages/MergeTree/MergeTreeIndexes.h | 35 ++++--------------- 5 files changed, 8 insertions(+), 39 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 1eb70920756..86b81b9c856 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -1039,7 +1039,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor new_data_part->index = source_part->index; new_data_part->partition.assign(source_part->partition); new_data_part->minmax_idx = source_part->minmax_idx; - new_data_part->index_parts = source_part->index_parts; new_data_part->modification_time = time(nullptr); new_data_part->bytes_on_disk = MergeTreeData::DataPart::calculateTotalSizeOnDisk(new_data_part->getFullPath()); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h index 1ae6aa602d0..90ea11e4141 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h @@ -208,8 +208,6 @@ struct MergeTreeDataPart MinMaxIndex minmax_idx; - MergeTreeIndexParts index_parts; - Checksums checksums; /// Columns description. diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 4eff75e567d..e38b92e1807 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -530,8 +530,8 @@ namespace DB /// It can be done in multiple threads (one thread for each part). /// Maybe it should be moved to BlockInputStream, but it can cause some problems. - for (auto index_part : part->index_parts) { - auto condition = index_part->createIndexConditionOnPart(query_info, context); + for (auto index : data.indexes) { + auto condition = index->createIndexConditionOnPart(query_info, context); if (!condition->alwaysUnknownOrTrue()) { ranges.ranges = condition->filterRanges(ranges.ranges); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp index f1ba281a747..c73999c2e3e 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp @@ -12,11 +12,6 @@ namespace ErrorCodes IndexType IndexCondition::indexType() const { - return part->indexType(); -} - - -IndexType MergeTreeIndexPart::indexType() const { return index->indexType(); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h index 30fe6fb1cda..c0262794fa0 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h @@ -21,20 +21,16 @@ enum class IndexType { class MergeTreeIndex; -struct MergeTreeIndexPart; using MergeTreeIndexPtr = std::shared_ptr; using MutableMergeTreeIndexPtr = std::shared_ptr; using MergeTreeIndexes = std::vector; -using MergeTreeIndexPartPtr = std::shared_ptr; -using MergeTreeIndexParts = std::vector; - /// Condition on the index. /// It works only with one indexPart (MergeTreeDataPart). class IndexCondition { - friend MergeTreeIndexPart; + friend MergeTreeIndex; public: virtual ~IndexCondition() = default; @@ -51,7 +47,7 @@ protected: IndexCondition() = default; public: - MergeTreeIndexPartPtr part; + MergeTreeIndexPtr index; }; using IndexConditionPtr = std::shared_ptr; @@ -59,7 +55,7 @@ using IndexConditionPtr = std::shared_ptr; struct MergeTreeIndexGranule { - friend MergeTreeIndexPart; + friend MergeTreeIndex; public: virtual ~MergeTreeIndexGranule(); @@ -76,28 +72,6 @@ using MergeTreeIndexGranulePtr = std::shared_ptr; using MergeTreeIndexGranules = std::vector; -/// Data structure for operations with index data for each MergeTreeDataPart. -/// Stores information specific for DataPart. -/// Возможно будет убран. -struct MergeTreeIndexPart -{ - friend MergeTreeIndex; - -public: - MergeTreeIndexPart() = default; - virtual ~MergeTreeIndexPart() = default; - - IndexType indexType() const; - - virtual MergeTreeIndexPartPtr cloneEmpty() const = 0; - - virtual IndexConditionPtr createIndexConditionOnPart( - const SelectQueryInfo & query_info, const Context & context) const = 0; - - MergeTreeIndexPtr index; -}; - - /// Structure for storing basic index info like columns, expression, arguments, ... class MergeTreeIndex { @@ -111,6 +85,9 @@ public: virtual MergeTreeIndexGranulePtr createIndexGranule() const = 0; + virtual IndexConditionPtr createIndexConditionOnPart( + const SelectQueryInfo & query_info, const Context & context) const = 0; + String name; ExpressionActionsPtr expr; size_t granularity; From 1c806288bf4edbd581e197a4dc6cf14380f17c4c Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 5 Jan 2019 12:26:02 +0300 Subject: [PATCH 024/586] fix --- .../src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp | 5 +++++ dbms/src/Storages/MergeTree/MergeTreeIndexes.h | 8 +++++++- dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp | 5 +++-- dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp | 1 + dbms/src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 5 files changed, 17 insertions(+), 4 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 86b81b9c856..721f58c4b0d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -970,6 +970,11 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor } NameSet files_to_skip = {"checksums.txt", "columns.txt"}; + + for (auto index : data.indexes) { + files_to_skip.insert(index->getFileName() + ".idx"); + } + for (const auto & entry : in_header) { IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path) diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h index c0262794fa0..178c23ca373 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h @@ -12,6 +12,8 @@ #include #include +constexpr auto INDEX_FILE_PREFIX = "skp_idx_"; + namespace DB { @@ -57,7 +59,6 @@ struct MergeTreeIndexGranule { friend MergeTreeIndex; -public: virtual ~MergeTreeIndexGranule(); virtual void serializeBinary(WriteBuffer & ostr) const = 0; @@ -83,6 +84,11 @@ public: virtual IndexType indexType() const = 0; + /// gets filename without extension + virtual String getFileName() const = 0; + + String getFileExt() const { return ".idx"; }; + virtual MergeTreeIndexGranulePtr createIndexGranule() const = 0; virtual IndexConditionPtr createIndexConditionOnPart( diff --git a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp index cb89087d86d..25328e96519 100644 --- a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -17,7 +17,6 @@ namespace constexpr auto DATA_FILE_EXTENSION = ".bin"; constexpr auto MARKS_FILE_EXTENSION = ".mrk"; constexpr auto INDEX_FILE_EXTENSION = ".idx"; -constexpr auto INDEX_FILE_PREFIX = "skp_idx_"; } @@ -355,6 +354,8 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart( } skip_indexes_streams.clear(); + skip_indexes_granules.clear(); + skip_index_filling.clear(); for (ColumnStreams::iterator it = column_streams.begin(); it != column_streams.end(); ++it) { @@ -414,7 +415,7 @@ void MergedBlockOutputStream::init() } for (const auto index : storage.indexes) { - String stream_name = INDEX_FILE_PREFIX + index->name; + String stream_name = index->getFileName(); skip_indexes_streams.emplace_back( std::move(std::make_unique( stream_name, diff --git a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp index a3da1885771..1030b6c8cac 100644 --- a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -332,6 +332,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) * - Sorting key in the ORDER BY clause; * - Primary key (if it is different from the sorting key) in the PRIMARY KEY clause; * - Sampling expression in the SAMPLE BY clause; + * - Secondary indexes * - Additional MergeTreeSettings in the SETTINGS clause; */ diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 0743cfce5e3..33b1407d949 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -227,7 +227,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( if (path_.empty()) throw Exception("ReplicatedMergeTree storages require data path", ErrorCodes::INCORRECT_FILE_NAME); if (!indexes_ast_.empty()) { - throw Exception("ReplicatedMergeTree storages do not support indexes", ErrorCodes::INCORRECT_QUERY); + throw Exception("check indexes support for ReplicatedMergeTree", ErrorCodes::INCORRECT_QUERY); } if (!zookeeper_path.empty() && zookeeper_path.back() == '/') From 82cc39d44105ec4f466ca9163258b068790d9767 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 5 Jan 2019 15:35:13 +0300 Subject: [PATCH 025/586] added setSkipIndexes --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 18 +++++++++++++----- dbms/src/Storages/MergeTree/MergeTreeData.h | 4 +++- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 9ddb17f7039..131b1df96ba 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -187,11 +187,7 @@ MergeTreeData::MergeTreeData( "MergeTree data format version on disk doesn't support custom partitioning", ErrorCodes::METADATA_MISMATCH); - for (const auto & index_ast : indexes_ast_) { - indexes.push_back( - std::move(MergeTreeIndexFactory::instance().get( - std::dynamic_pointer_cast(index_ast)))); - } + setSkipIndexes(indexes_ast_); } @@ -351,6 +347,18 @@ void MergeTreeData::setPrimaryKeyAndColumns( } +void MergeTreeData::setSkipIndexes(const ASTs & indexes_asts, bool only_check) +{ + if (!only_check) { + for (const auto &index_ast : indexes_asts) { + indexes.push_back( + std::move(MergeTreeIndexFactory::instance().get( + std::dynamic_pointer_cast(index_ast)))); + } + } +} + + ASTPtr MergeTreeData::extractKeyExpressionList(const ASTPtr & node) { if (!node) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index a49648ad682..7e89085b400 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -580,7 +580,7 @@ public: Int64 minmax_idx_date_column_pos = -1; /// In a common case minmax index includes a date column. Int64 minmax_idx_time_column_pos = -1; /// In other cases, minmax index often includes a dateTime column. - /// Secondary indexes for MergeTree + /// Secondary (data skipping) indexes for MergeTree MergeTreeIndexes indexes; /// Names of columns for primary key + secondary sorting columns. @@ -725,6 +725,8 @@ private: void setPrimaryKeyAndColumns(const ASTPtr & new_order_by_ast, ASTPtr new_primary_key_ast, const ColumnsDescription & new_columns, bool only_check = false); + void setSkipIndexes(const ASTs & indexes_asts, bool only_check = false); + void initPartitionKey(); /// Expression for column type conversion. From 61b9c7735bd1bc7d0eb857fa971f2948aa251cf7 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 5 Jan 2019 21:33:30 +0300 Subject: [PATCH 026/586] add rw for MergeTreeIndexes --- .../Storages/MergeTree/MergeTreeIndexes.cpp | 47 +++++++++++++++++-- .../src/Storages/MergeTree/MergeTreeIndexes.h | 21 +++++---- dbms/src/Storages/MergeTree/checkDataPart.cpp | 10 ++-- 3 files changed, 60 insertions(+), 18 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp index c73999c2e3e..10256e76181 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp @@ -1,4 +1,9 @@ #include +#include +#include +#include +#include + namespace DB { @@ -11,18 +16,45 @@ namespace ErrorCodes } -IndexType IndexCondition::indexType() const { - return index->indexType(); +void MergeTreeIndexes::writeText(DB::WriteBuffer &ostr) const +{ + writeString("indexes format version: 1\n", ostr); + DB::writeText(size(), ostr); + writeString(" indexes:\n", ostr); + for (auto index : *this) { + index->writeText(ostr); + writeChar('\n', ostr); + } } -void MergeTreeIndexFactory::registerIndex(const std::string &name, Creator creator) { +void MergeTreeIndexes::readText(DB::ReadBuffer &istr) +{ + const MergeTreeIndexFactory & factory = MergeTreeIndexFactory::instance(); + + assertString("indexes format version: 1\n", istr); + size_t count; + DB::readText(count, istr); + assertString(" indexes:\n", istr); + reserve(count); + for (size_t i = 0; i < count; ++i) { + String index_descr; + readString(index_descr, istr); + emplace_back(factory.get(index_descr)); + assertChar('\n', istr); + } +} + + +void MergeTreeIndexFactory::registerIndex(const std::string &name, Creator creator) +{ if (!indexes.emplace(name, std::move(creator)).second) throw Exception("MergeTreeIndexFactory: the Index creator name '" + name + "' is not unique", ErrorCodes::LOGICAL_ERROR); } -std::unique_ptr MergeTreeIndexFactory::get(std::shared_ptr node) const { +std::unique_ptr MergeTreeIndexFactory::get(std::shared_ptr node) const +{ if (!node->type) throw Exception( "for INDEX TYPE is required", @@ -35,4 +67,11 @@ std::unique_ptr MergeTreeIndexFactory::get(std::shared_ptrsecond(node); } +std::unique_ptr MergeTreeIndexFactory::get(const String & description) const +{ + ParserIndexDeclaration parser; + ASTPtr ast = parseQuery(parser, description.data(), description.data() + description.size(), "index factory", 0); + return get(std::dynamic_pointer_cast(ast)); +} + } \ No newline at end of file diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h index 178c23ca373..bdd7bee17d4 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h @@ -17,16 +17,11 @@ constexpr auto INDEX_FILE_PREFIX = "skp_idx_"; namespace DB { -enum class IndexType { - NONE = 0 -}; - class MergeTreeIndex; using MergeTreeIndexPtr = std::shared_ptr; using MutableMergeTreeIndexPtr = std::shared_ptr; -using MergeTreeIndexes = std::vector; /// Condition on the index. @@ -37,8 +32,6 @@ class IndexCondition { public: virtual ~IndexCondition() = default; - IndexType indexType() const; - /// Checks if this index is useful for query. virtual bool alwaysUnknownOrTrue() const = 0; @@ -82,18 +75,18 @@ public: virtual ~MergeTreeIndex() {}; - virtual IndexType indexType() const = 0; + virtual String indexType() const { return "UNKNOWN"; }; /// gets filename without extension virtual String getFileName() const = 0; - String getFileExt() const { return ".idx"; }; - virtual MergeTreeIndexGranulePtr createIndexGranule() const = 0; virtual IndexConditionPtr createIndexConditionOnPart( const SelectQueryInfo & query_info, const Context & context) const = 0; + virtual void writeText(WriteBuffer & ostr) const = 0; + String name; ExpressionActionsPtr expr; size_t granularity; @@ -103,6 +96,13 @@ public: }; +class MergeTreeIndexes : public std::vector +{ + void writeText(WriteBuffer & ostr) const; + void readText(ReadBuffer & istr); +}; + + class MergeTreeIndexFactory : public ext::singleton { friend class ext::singleton; @@ -111,6 +111,7 @@ public: using Creator = std::function(std::shared_ptr node)>; std::unique_ptr get(std::shared_ptr node) const; + std::unique_ptr get(const String & description) const; void registerIndex(const std::string & name, Creator creator); diff --git a/dbms/src/Storages/MergeTree/checkDataPart.cpp b/dbms/src/Storages/MergeTree/checkDataPart.cpp index eac9145692b..60ef117f0d7 100644 --- a/dbms/src/Storages/MergeTree/checkDataPart.cpp +++ b/dbms/src/Storages/MergeTree/checkDataPart.cpp @@ -30,12 +30,13 @@ namespace ErrorCodes namespace { -/** To read and checksum single stream (a pair of .bin, .mrk files) for a single column. +/** To read and checksum single stream (a pair of .bin, .mrk files) for a single column or secondary index. */ class Stream { public: String base_name; + String bin_file_ext; String bin_file_path; String mrk_file_path; private: @@ -50,10 +51,11 @@ private: public: HashingReadBuffer mrk_hashing_buf; - Stream(const String & path, const String & base_name) + Stream(const String & path, const String & base_name, const String & bin_file_ext = ".bin") : base_name(base_name), - bin_file_path(path + base_name + ".bin"), + bin_file_ext(bin_file_ext), + bin_file_path(path + base_name + bin_file_ext), mrk_file_path(path + base_name + ".mrk"), file_buf(bin_file_path), compressed_hashing_buf(file_buf), @@ -118,7 +120,7 @@ public: void saveChecksums(MergeTreeData::DataPart::Checksums & checksums) { - checksums.files[base_name + ".bin"] = MergeTreeData::DataPart::Checksums::Checksum( + checksums.files[base_name + bin_file_ext] = MergeTreeData::DataPart::Checksums::Checksum( compressed_hashing_buf.count(), compressed_hashing_buf.getHash(), uncompressed_hashing_buf.count(), uncompressed_hashing_buf.getHash()); From c3f1784dbcf7b99266fad99ec8b870c54687d6bd Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 5 Jan 2019 22:27:31 +0300 Subject: [PATCH 027/586] fixes --- dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp | 6 +----- dbms/src/Storages/MergeTree/MergeTreeIndexes.h | 1 + 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 721f58c4b0d..bc5dab7ebd3 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -956,7 +956,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor /// We will modify only some of the columns. Other columns and key values can be copied as-is. /// TODO: check that we modify only non-key columns in this case. - /// TODO: just recalc index on part + /// TODO: more effective check for (const auto & col : in_header.getNames()) { for (const auto index : data.indexes) { const auto & index_cols = index->expr->getRequiredColumns(); @@ -971,10 +971,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor NameSet files_to_skip = {"checksums.txt", "columns.txt"}; - for (auto index : data.indexes) { - files_to_skip.insert(index->getFileName() + ".idx"); - } - for (const auto & entry : in_header) { IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path) diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h index bdd7bee17d4..2be40d1b3cc 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h @@ -98,6 +98,7 @@ public: class MergeTreeIndexes : public std::vector { +public: void writeText(WriteBuffer & ostr) const; void readText(ReadBuffer & istr); }; From 83368a48667a988f03ab28797dbcabf6791b4e0d Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 6 Jan 2019 15:10:22 +0300 Subject: [PATCH 028/586] upd error --- dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp index 10256e76181..5558811268d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp @@ -4,6 +4,8 @@ #include #include +#include + namespace DB { @@ -62,7 +64,15 @@ std::unique_ptr MergeTreeIndexFactory::get(std::shared_ptrtype->name); if (it == indexes.end()) throw Exception( - "Unknown Index type '" + node->type->name + "'", + "Unknown Index type '" + node->type->name + "'. Available index types: " + + std::accumulate(indexes.cbegin(), indexes.cend(), std::string{}, + [] (auto && lft, const auto & rht) -> std::string { + if (lft == "") { + return rht.first; + } else { + return lft + ", " + rht.first; + } + }) + ".", ErrorCodes::INCORRECT_QUERY); return it->second(node); } From 7e0e301067787a14ab418115c197e3b13a87b1fa Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 6 Jan 2019 15:12:42 +0300 Subject: [PATCH 029/586] fix --- dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp index 5558811268d..2163fe151ef 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp @@ -72,7 +72,7 @@ std::unique_ptr MergeTreeIndexFactory::get(std::shared_ptrsecond(node); } From 17f6618fa351eeefc584dcd77fc53836d32ad548 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 6 Jan 2019 18:22:04 +0300 Subject: [PATCH 030/586] fix --- dbms/src/Storages/MergeTree/MergeTreeIndexes.h | 6 ++++++ dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h index 2be40d1b3cc..221893d81a3 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h @@ -66,6 +66,12 @@ using MergeTreeIndexGranulePtr = std::shared_ptr; using MergeTreeIndexGranules = std::vector; +class MergeTreeIndexReader { +public: + MergeTreeIndexPtr index; +}; + + /// Structure for storing basic index info like columns, expression, arguments, ... class MergeTreeIndex { diff --git a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 097bc632c03..42d19e6afe8 100644 --- a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -428,7 +428,7 @@ void MergedBlockOutputStream::init() stream_name, part_path + stream_name, INDEX_FILE_EXTENSION, part_path + stream_name, MARKS_FILE_EXTENSION, - max_compress_block_size, compression_settings, + codec, max_compress_block_size, 0, aio_threshold))); skip_indexes_granules.emplace_back(nullptr); From 11d37efa74fb23bd5759ce04e90b506292303d48 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 7 Jan 2019 15:51:14 +0300 Subject: [PATCH 031/586] reading --- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 68 +++++++++++++++++-- .../MergeTree/MergeTreeDataSelectExecutor.h | 7 ++ .../MergeTree/MergeTreeIndexReader.cpp | 26 +++++++ .../Storages/MergeTree/MergeTreeIndexReader.h | 27 ++++++++ .../src/Storages/MergeTree/MergeTreeIndexes.h | 47 ++++++------- dbms/src/Storages/MergeTree/MergeTreeReader.h | 3 +- .../MergeTree/registerStorageMergeTree.cpp | 6 ++ 7 files changed, 151 insertions(+), 33 deletions(-) create mode 100644 dbms/src/Storages/MergeTree/MergeTreeIndexReader.cpp create mode 100644 dbms/src/Storages/MergeTree/MergeTreeIndexReader.h diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index e38b92e1807..c951ccd951a 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1,11 +1,15 @@ #include /// For calculations related to sampling coefficients. #include +#include + #include #include #include #include #include +#include +#include #include #include #include @@ -531,10 +535,12 @@ namespace DB /// It can be done in multiple threads (one thread for each part). /// Maybe it should be moved to BlockInputStream, but it can cause some problems. for (auto index : data.indexes) { - auto condition = index->createIndexConditionOnPart(query_info, context); - if (!condition->alwaysUnknownOrTrue()) { - ranges.ranges = condition->filterRanges(ranges.ranges); + auto condition = index->createIndexCondition(query_info, context); + if (condition->alwaysUnknownOrTrue()) { + continue; } + + ranges.ranges = filterMarksUsingIndex(index, condition, part, ranges.ranges, settings); } if (!ranges.ranges.empty()) @@ -930,7 +936,7 @@ namespace DB if (range.end == range.begin + 1) { /// We saw a useful gap between neighboring marks. Either add it to the last range, or start a new range. - if (res.empty() || range.begin - res.back().end > min_marks_for_seek) + if (res.empty() || range.begin - res.back().end > min_marks_for_seek) // is it a bug?? res.push_back(range); else res.back().end = range.end; @@ -952,4 +958,58 @@ namespace DB return res; } + MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( + MergeTreeIndexPtr index, + IndexConditionPtr condition, + MergeTreeData::DataPartPtr part, + const MarkRanges & ranges, + const Settings & settings) const + { + if (!Poco::File(part->getFullPath() + index->getFileName() + ".idx").exists()) { + return ranges; + } + + const size_t min_marks_for_seek = (settings.merge_tree_min_rows_for_seek + data.index_granularity - 1) / data.index_granularity; + + MergeTreeIndexReader reader( + index, part, + ((part->marks_count + index->granularity - 1) / index->granularity), + ranges); + + MarkRanges res; + + MergeTreeIndexGranulePtr granule = nullptr; + size_t last_index_mark = 0; + for (const auto & range : ranges) + { + MarkRange index_range( + range.begin / index->granularity, range.end / index->granularity); + + if (last_index_mark != index_range.begin || !granule) { + reader.seek(index_range.begin); + } + + for (size_t index_mark = index_range.begin; index_mark < index_range.end; ++index_mark) + { + if (index_mark != index_range.begin || !granule || last_index_mark != index_range.begin) + granule = reader.read(); + + MarkRange data_range( + std::max(range.begin, index_mark * index->granularity), + std::min(range.end, (index_mark + 1) * index->granularity)); + + if (!condition->mayBeTrueOnGranule(*granule)) + continue; + + if (res.empty() || res.back().end - data_range.begin >= min_marks_for_seek) + res.push_back(data_range); + else + res.back().end = data_range.end; + } + + last_index_mark = index_range.end - 1; + } + return res; + } + } \ No newline at end of file diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 576b88f2e41..8010cc9c889 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -81,6 +81,13 @@ private: const MergeTreeData::DataPart::Index & index, const KeyCondition & key_condition, const Settings & settings) const; + + MarkRanges filterMarksUsingIndex( + MergeTreeIndexPtr index, + IndexConditionPtr condition, + MergeTreeData::DataPartPtr part, + const MarkRanges & ranges, + const Settings & settings) const; }; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexReader.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexReader.cpp new file mode 100644 index 00000000000..f81f325b065 --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexReader.cpp @@ -0,0 +1,26 @@ +#include + + +namespace DB { + +MergeTreeIndexReader::MergeTreeIndexReader( + MergeTreeIndexPtr index, MergeTreeData::DataPartPtr part, size_t marks_count, const MarkRanges & all_mark_ranges) + : index(index), stream( + part->getFullPath() + index->getFileName(), ".idx", marks_count, + all_mark_ranges, nullptr, false, nullptr, 0, DBMS_DEFAULT_BUFFER_SIZE, + ReadBufferFromFileBase::ProfileCallback{}, CLOCK_MONOTONIC_COARSE) { + stream.seekToStart(); +} + +void MergeTreeIndexReader::seek(size_t mark) +{ + stream.seekToMark(mark); +} + +MergeTreeIndexGranulePtr MergeTreeIndexReader::read() { + auto granule = index->createIndexGranule(); + granule->deserializeBinary(*stream.data_buffer); + return granule; +} + +} \ No newline at end of file diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexReader.h b/dbms/src/Storages/MergeTree/MergeTreeIndexReader.h new file mode 100644 index 00000000000..32275f7f3b2 --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexReader.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB { + +class MergeTreeIndexReader { +public: + MergeTreeIndexReader( + MergeTreeIndexPtr index, + MergeTreeData::DataPartPtr part, + size_t marks_count, + const MarkRanges & all_mark_ranges); + + void seek(size_t mark); + + MergeTreeIndexGranulePtr read(); + +private: + MergeTreeIndexPtr index; + MergeTreeReader::Stream stream; +}; + +} \ No newline at end of file diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h index 221893d81a3..9475ffe2b5e 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h @@ -17,37 +17,12 @@ constexpr auto INDEX_FILE_PREFIX = "skp_idx_"; namespace DB { - class MergeTreeIndex; using MergeTreeIndexPtr = std::shared_ptr; using MutableMergeTreeIndexPtr = std::shared_ptr; -/// Condition on the index. -/// It works only with one indexPart (MergeTreeDataPart). -class IndexCondition { - friend MergeTreeIndex; - -public: - virtual ~IndexCondition() = default; - - /// Checks if this index is useful for query. - virtual bool alwaysUnknownOrTrue() const = 0; - - /// Drops out ranges where query is false - virtual MarkRanges filterRanges(const MarkRanges & ranges) const = 0; - -protected: - IndexCondition() = default; - -public: - MergeTreeIndexPtr index; -}; - -using IndexConditionPtr = std::shared_ptr; - - struct MergeTreeIndexGranule { friend MergeTreeIndex; @@ -62,15 +37,31 @@ struct MergeTreeIndexGranule virtual void update(const Block & block, size_t * pos, size_t limit) = 0; }; + using MergeTreeIndexGranulePtr = std::shared_ptr; using MergeTreeIndexGranules = std::vector; +/// Condition on the index. +class IndexCondition { + friend MergeTreeIndex; + +public: + virtual ~IndexCondition() = default; + + /// Checks if this index is useful for query. + virtual bool alwaysUnknownOrTrue() const = 0; + + virtual bool mayBeTrueOnGranule(const MergeTreeIndexGranule & granule); + +protected: + IndexCondition() = default; -class MergeTreeIndexReader { public: MergeTreeIndexPtr index; }; +using IndexConditionPtr = std::shared_ptr; + /// Structure for storing basic index info like columns, expression, arguments, ... class MergeTreeIndex @@ -84,11 +75,11 @@ public: virtual String indexType() const { return "UNKNOWN"; }; /// gets filename without extension - virtual String getFileName() const = 0; + virtual String getFileName() const { return INDEX_FILE_PREFIX + name; }; virtual MergeTreeIndexGranulePtr createIndexGranule() const = 0; - virtual IndexConditionPtr createIndexConditionOnPart( + virtual IndexConditionPtr createIndexCondition( const SelectQueryInfo & query_info, const Context & context) const = 0; virtual void writeText(WriteBuffer & ostr) const = 0; diff --git a/dbms/src/Storages/MergeTree/MergeTreeReader.h b/dbms/src/Storages/MergeTree/MergeTreeReader.h index ac5d46fb664..744f1c0dbe4 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeReader.h +++ b/dbms/src/Storages/MergeTree/MergeTreeReader.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -56,7 +57,6 @@ public: /// If continue_reading is true, continue reading from last state, otherwise seek to from_mark size_t readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Block & res); -private: class Stream { public: @@ -94,6 +94,7 @@ private: std::unique_ptr non_cached_buffer; }; +private: using FileStreams = std::map>; /// avg_value_size_hints are used to reduce the number of reallocations when creating columns of variable size. diff --git a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp index 1030b6c8cac..837f90396bf 100644 --- a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -181,6 +182,11 @@ static void setGraphitePatternsFromConfig(const Context & context, } +static void registerMergeTreeSkipIndexes() { + +} + + static String getMergeTreeVerboseHelp(bool is_extended_syntax) { using namespace std::string_literals; From 6871665231975f169d4fe1fe05d43c9dcb159f4c Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 7 Jan 2019 19:49:34 +0300 Subject: [PATCH 032/586] test index --- .../src/Storages/MergeTree/MergeTreeIndexes.h | 17 ++-- .../Storages/MergeTree/MergeTreeTestIndex.cpp | 2 + .../Storages/MergeTree/MergeTreeTestIndex.h | 83 +++++++++++++++++++ .../MergeTree/registerStorageMergeTree.cpp | 16 ++-- 4 files changed, 100 insertions(+), 18 deletions(-) create mode 100644 dbms/src/Storages/MergeTree/MergeTreeTestIndex.cpp create mode 100644 dbms/src/Storages/MergeTree/MergeTreeTestIndex.h diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h index 9475ffe2b5e..1a9efb8445f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h @@ -25,12 +25,10 @@ using MutableMergeTreeIndexPtr = std::shared_ptr; struct MergeTreeIndexGranule { - friend MergeTreeIndex; - - virtual ~MergeTreeIndexGranule(); + virtual ~MergeTreeIndexGranule() = default; virtual void serializeBinary(WriteBuffer & ostr) const = 0; - virtual void deserializeBinary(ReadBuffer & istr) const = 0; + virtual void deserializeBinary(ReadBuffer & istr) = 0; virtual bool empty() const = 0; @@ -43,20 +41,15 @@ using MergeTreeIndexGranules = std::vector; /// Condition on the index. class IndexCondition { - friend MergeTreeIndex; - public: + IndexCondition() = default; virtual ~IndexCondition() = default; /// Checks if this index is useful for query. virtual bool alwaysUnknownOrTrue() const = 0; - virtual bool mayBeTrueOnGranule(const MergeTreeIndexGranule & granule); + virtual bool mayBeTrueOnGranule(const MergeTreeIndexGranule & granule) const = 0; -protected: - IndexCondition() = default; - -public: MergeTreeIndexPtr index; }; @@ -75,7 +68,7 @@ public: virtual String indexType() const { return "UNKNOWN"; }; /// gets filename without extension - virtual String getFileName() const { return INDEX_FILE_PREFIX + name; }; + String getFileName() const { return INDEX_FILE_PREFIX + name; }; virtual MergeTreeIndexGranulePtr createIndexGranule() const = 0; diff --git a/dbms/src/Storages/MergeTree/MergeTreeTestIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeTestIndex.cpp new file mode 100644 index 00000000000..29e15b66503 --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeTestIndex.cpp @@ -0,0 +1,2 @@ +#include + diff --git a/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h b/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h new file mode 100644 index 00000000000..538e45221e5 --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h @@ -0,0 +1,83 @@ +#pragma once + +#include +#include +#include + +#include +#include + +namespace DB { + +class MergeTreeTestIndex; + +struct MergeTreeTestGranule : public MergeTreeIndexGranule { + ~MergeTreeTestGranule() override {}; + + void serializeBinary(WriteBuffer &ostr) const override { + writeIntBinary(emp, ostr); + } + + void deserializeBinary(ReadBuffer &istr) override { + readIntBinary(emp, istr); + } + + bool empty() const override { + return static_cast(emp); + } + + void update(const Block &block, size_t *pos, size_t limit) override { + *pos += std::min(limit, block.rows() - *pos); + emp = false; + }; + + Int32 emp = true; +}; + +class IndexTestCondition : public IndexCondition{ +public: + IndexTestCondition() = default; + ~IndexTestCondition() override {}; + + /// Checks if this index is useful for query. + bool alwaysUnknownOrTrue() const override { return false; }; + + bool mayBeTrueOnGranule(const MergeTreeIndexGranule &) const override { + return true; + } + +}; + + +class MergeTreeTestIndex : public MergeTreeIndex +{ +public: + MergeTreeTestIndex(String name, ExpressionActionsPtr expr, size_t granularity, Block key) + : MergeTreeIndex(name, expr, granularity, key) {} + + ~MergeTreeTestIndex() override {} + + String indexType() const override { return "TEST"; } + + /// gets filename without extension + + MergeTreeIndexGranulePtr createIndexGranule() const override { + return std::make_shared(); + } + + IndexConditionPtr createIndexCondition( + const SelectQueryInfo & , const Context & ) const override { + return std::make_shared(); + }; + + void writeText(WriteBuffer & ostr) const override { + DB::writeText(10, ostr); + }; +}; + +std::unique_ptr MTItestCreator(std::shared_ptr node) { + return std::make_unique( + node->name, nullptr, node->granularity.get(), Block{}); +} + +} \ No newline at end of file diff --git a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp index 837f90396bf..fdb87b77ef4 100644 --- a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -1,7 +1,8 @@ #include #include -#include #include +#include +#include #include #include @@ -182,11 +183,6 @@ static void setGraphitePatternsFromConfig(const Context & context, } -static void registerMergeTreeSkipIndexes() { - -} - - static String getMergeTreeVerboseHelp(bool is_extended_syntax) { using namespace std::string_literals; @@ -639,6 +635,12 @@ static StoragePtr create(const StorageFactory::Arguments & args) } +static void registerMergeTreeSkipIndexes() { + auto & factory = MergeTreeIndexFactory::instance(); + factory.registerIndex("test", MTItestCreator); +} + + void registerStorageMergeTree(StorageFactory & factory) { factory.registerStorage("MergeTree", create); @@ -656,6 +658,8 @@ void registerStorageMergeTree(StorageFactory & factory) factory.registerStorage("ReplicatedSummingMergeTree", create); factory.registerStorage("ReplicatedGraphiteMergeTree", create); factory.registerStorage("ReplicatedVersionedCollapsingMergeTree", create); + + registerMergeTreeSkipIndexes(); } } From 69c6e77d53e61d92fb9c2292931a8ffb513bdfbc Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 7 Jan 2019 21:53:51 +0300 Subject: [PATCH 033/586] fixed nullptr error --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 4 +- .../MergeTree/MergeTreeDataMergerMutator.cpp | 5 +++ .../MergeTree/MergeTreeDataWriter.cpp | 11 +++++ .../Storages/MergeTree/MergeTreeIndexes.cpp | 44 +++---------------- .../src/Storages/MergeTree/MergeTreeIndexes.h | 20 +++++---- .../Storages/MergeTree/MergeTreeTestIndex.h | 5 ++- 6 files changed, 38 insertions(+), 51 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index ea38f11454d..3063161852d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -355,7 +355,9 @@ void MergeTreeData::setSkipIndexes(const ASTs & indexes_asts, bool only_check) for (const auto &index_ast : indexes_asts) { indexes.push_back( std::move(MergeTreeIndexFactory::instance().get( - std::dynamic_pointer_cast(index_ast)))); + *this, + std::dynamic_pointer_cast(index_ast), + global_context))); } } } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 232146ea417..19b81833cbf 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -553,18 +553,23 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor Names all_column_names = data.getColumns().getNamesOfPhysical(); NamesAndTypesList all_columns = data.getColumns().getAllPhysical(); + LOG_DEBUG(log, "Before extract"); NamesAndTypesList gathering_columns, merging_columns; Names gathering_column_names, merging_column_names; extractMergingAndGatheringColumns( all_columns, data.sorting_key_expr, data.indexes, data.merging_params, gathering_columns, gathering_column_names, merging_columns, merging_column_names); + LOG_DEBUG(log, "After extract"); + MergeTreeData::MutableDataPartPtr new_data_part = std::make_shared( data, future_part.name, future_part.part_info); new_data_part->partition.assign(future_part.getPartition()); new_data_part->relative_path = TMP_PREFIX + future_part.name; new_data_part->is_temp = true; + LOG_DEBUG(log, "New Part"); + size_t sum_input_rows_upper_bound = merge_entry->total_size_marks * data.index_granularity; MergeAlgorithm merge_alg = chooseMergeAlgorithm(parts, sum_input_rows_upper_bound, gathering_columns, deduplicate); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 2b7ede696ad..5e5a7fecd21 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -214,6 +214,17 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa NamesAndTypesList columns = data.getColumns().getAllPhysical().filter(block.getNames()); MergedBlockOutputStream out(data, new_data_part->getFullPath(), columns, compression_codec); + for (auto index : data.indexes) + { + auto index_columns = index->expr->getRequiredColumnsWithTypes(); + for (const auto & column : index_columns) + { + if (!block.has(column.name)) + block.insert(ColumnWithTypeAndName(column.type, column.name)); + } + index->expr->execute(block); + } + out.writePrefix(); out.writeWithPermutation(block, perm_ptr); out.writeSuffixAndFinalizePart(new_data_part); diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp index 2163fe151ef..2620dc6cc38 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp @@ -18,36 +18,6 @@ namespace ErrorCodes } -void MergeTreeIndexes::writeText(DB::WriteBuffer &ostr) const -{ - writeString("indexes format version: 1\n", ostr); - DB::writeText(size(), ostr); - writeString(" indexes:\n", ostr); - for (auto index : *this) { - index->writeText(ostr); - writeChar('\n', ostr); - } -} - - -void MergeTreeIndexes::readText(DB::ReadBuffer &istr) -{ - const MergeTreeIndexFactory & factory = MergeTreeIndexFactory::instance(); - - assertString("indexes format version: 1\n", istr); - size_t count; - DB::readText(count, istr); - assertString(" indexes:\n", istr); - reserve(count); - for (size_t i = 0; i < count; ++i) { - String index_descr; - readString(index_descr, istr); - emplace_back(factory.get(index_descr)); - assertChar('\n', istr); - } -} - - void MergeTreeIndexFactory::registerIndex(const std::string &name, Creator creator) { if (!indexes.emplace(name, std::move(creator)).second) @@ -55,7 +25,10 @@ void MergeTreeIndexFactory::registerIndex(const std::string &name, Creator creat ErrorCodes::LOGICAL_ERROR); } -std::unique_ptr MergeTreeIndexFactory::get(std::shared_ptr node) const +std::unique_ptr MergeTreeIndexFactory::get( + const MergeTreeData & data, + std::shared_ptr node, + const Context & context) const { if (!node->type) throw Exception( @@ -74,14 +47,7 @@ std::unique_ptr MergeTreeIndexFactory::get(std::shared_ptrsecond(node); -} - -std::unique_ptr MergeTreeIndexFactory::get(const String & description) const -{ - ParserIndexDeclaration parser; - ASTPtr ast = parseQuery(parser, description.data(), description.data() + description.size(), "index factory", 0); - return get(std::dynamic_pointer_cast(ast)); + return it->second(data, node, context); } } \ No newline at end of file diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h index 1a9efb8445f..cc97a82f3dc 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h @@ -17,6 +17,7 @@ constexpr auto INDEX_FILE_PREFIX = "skp_idx_"; namespace DB { +class MergeTreeData; class MergeTreeIndex; using MergeTreeIndexPtr = std::shared_ptr; @@ -86,12 +87,7 @@ public: }; -class MergeTreeIndexes : public std::vector -{ -public: - void writeText(WriteBuffer & ostr) const; - void readText(ReadBuffer & istr); -}; +using MergeTreeIndexes = std::vector; class MergeTreeIndexFactory : public ext::singleton @@ -99,10 +95,16 @@ class MergeTreeIndexFactory : public ext::singleton friend class ext::singleton; public: - using Creator = std::function(std::shared_ptr node)>; + using Creator = std::function< + std::unique_ptr( + const MergeTreeData & data, + std::shared_ptr node, + const Context & context)>; - std::unique_ptr get(std::shared_ptr node) const; - std::unique_ptr get(const String & description) const; + std::unique_ptr get( + const MergeTreeData & data, + std::shared_ptr node, + const Context & context) const; void registerIndex(const std::string & name, Creator creator); diff --git a/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h b/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h index 538e45221e5..784c2b18f8c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h @@ -75,9 +75,10 @@ public: }; }; -std::unique_ptr MTItestCreator(std::shared_ptr node) { +std::unique_ptr MTItestCreator( + const MergeTreeData & data, std::shared_ptr node, const Context & ) { return std::make_unique( - node->name, nullptr, node->granularity.get(), Block{}); + node->name, data.primary_key_expr, node->granularity.get(), Block{}); } } \ No newline at end of file From 76c25c20244a46859d26401b17b56d64944c702b Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 7 Jan 2019 22:21:51 +0300 Subject: [PATCH 034/586] fixed --- .../Storages/MergeTree/MergeTreeDataSelectExecutor.cpp | 9 ++++----- dbms/src/Storages/MergeTree/MergeTreeTestIndex.h | 7 ++++++- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index c951ccd951a..87846e491bc 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -536,11 +536,9 @@ namespace DB /// Maybe it should be moved to BlockInputStream, but it can cause some problems. for (auto index : data.indexes) { auto condition = index->createIndexCondition(query_info, context); - if (condition->alwaysUnknownOrTrue()) { - continue; + if (!condition->alwaysUnknownOrTrue()) { + ranges.ranges = filterMarksUsingIndex(index, condition, part, ranges.ranges, settings); } - - ranges.ranges = filterMarksUsingIndex(index, condition, part, ranges.ranges, settings); } if (!ranges.ranges.empty()) @@ -983,7 +981,8 @@ namespace DB for (const auto & range : ranges) { MarkRange index_range( - range.begin / index->granularity, range.end / index->granularity); + range.begin / index->granularity, + (range.end + index->granularity - 1) / index->granularity); if (last_index_mark != index_range.begin || !granule) { reader.seek(index_range.begin); diff --git a/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h b/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h index 784c2b18f8c..f1253f8e906 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h @@ -7,6 +7,9 @@ #include #include +#include +#include + namespace DB { class MergeTreeTestIndex; @@ -15,11 +18,13 @@ struct MergeTreeTestGranule : public MergeTreeIndexGranule { ~MergeTreeTestGranule() override {}; void serializeBinary(WriteBuffer &ostr) const override { + //std::cerr << "TESTINDEX: written " << emp << "\n"; writeIntBinary(emp, ostr); } void deserializeBinary(ReadBuffer &istr) override { readIntBinary(emp, istr); + //std::cerr << "TESTINDEX: read " << emp << "\n"; } bool empty() const override { @@ -28,7 +33,7 @@ struct MergeTreeTestGranule : public MergeTreeIndexGranule { void update(const Block &block, size_t *pos, size_t limit) override { *pos += std::min(limit, block.rows() - *pos); - emp = false; + emp = rand(); }; Int32 emp = true; From 541c641b2442ff2032d8bc679a73bd6d2dab6ca3 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 8 Jan 2019 00:06:08 +0300 Subject: [PATCH 035/586] fix --- dbms/src/Storages/MergeTree/MergeTreeIndexes.h | 2 -- dbms/src/Storages/MergeTree/MergeTreeTestIndex.h | 9 +++------ .../Storages/MergeTree/MergedBlockOutputStream.cpp | 12 ++++++------ 3 files changed, 9 insertions(+), 14 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h index cc97a82f3dc..239678e1d21 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h @@ -76,8 +76,6 @@ public: virtual IndexConditionPtr createIndexCondition( const SelectQueryInfo & query_info, const Context & context) const = 0; - virtual void writeText(WriteBuffer & ostr) const = 0; - String name; ExpressionActionsPtr expr; size_t granularity; diff --git a/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h b/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h index f1253f8e906..b501df59017 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h @@ -28,15 +28,15 @@ struct MergeTreeTestGranule : public MergeTreeIndexGranule { } bool empty() const override { - return static_cast(emp); + return emp == 0; } void update(const Block &block, size_t *pos, size_t limit) override { *pos += std::min(limit, block.rows() - *pos); - emp = rand(); + emp = 10; }; - Int32 emp = true; + Int32 emp = 0; }; class IndexTestCondition : public IndexCondition{ @@ -75,9 +75,6 @@ public: return std::make_shared(); }; - void writeText(WriteBuffer & ostr) const override { - DB::writeText(10, ostr); - }; }; std::unique_ptr MTItestCreator( diff --git a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 42d19e6afe8..dad1c8c7a06 100644 --- a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -548,11 +548,11 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm { const auto index = storage.indexes[i]; auto & stream = *skip_indexes_streams[i]; - size_t prev_mark = 0; + size_t prev_pos = 0; - while (prev_mark < rows) { + while (prev_pos < rows) { size_t limit = 0; - if (prev_mark == 0 && index_offset != 0) + if (prev_pos == 0 && index_offset != 0) { limit = index_offset; } @@ -571,10 +571,10 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm } } - size_t pos = prev_mark; + size_t pos = prev_pos; skip_indexes_granules[i]->update(block, &pos, limit); - if (pos == prev_mark + limit) { + if (pos == prev_pos + limit) { ++skip_index_filling[i]; /// write index if it is filled @@ -584,7 +584,7 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm skip_index_filling[i] = 0; } } - prev_mark = pos; + prev_pos = pos; } } } From 48e136f02990f60b6416199f1964fdef03c90121 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 8 Jan 2019 12:38:46 +0300 Subject: [PATCH 036/586] unique names --- dbms/src/Parsers/ASTIndexDeclaration.h | 6 ++++-- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 17 +++++++++++++++-- .../src/Storages/MergeTree/MergeTreeTestIndex.h | 9 +++++++++ 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/dbms/src/Parsers/ASTIndexDeclaration.h b/dbms/src/Parsers/ASTIndexDeclaration.h index be894d52960..cfcf84c2528 100644 --- a/dbms/src/Parsers/ASTIndexDeclaration.h +++ b/dbms/src/Parsers/ASTIndexDeclaration.h @@ -27,8 +27,10 @@ public: String getID(char) const override { return "Index"; } ASTPtr clone() const override { - auto res = std::make_shared(*this); - res->children.clear(); + auto res = std::make_shared(); + + res->name = name; + res->granularity = granularity; if (expr) res->set(res->expr, expr->clone()); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 3063161852d..944dbaea0c7 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -47,6 +47,7 @@ #include #include +#include #include #include #include @@ -351,15 +352,27 @@ void MergeTreeData::setPrimaryKeyAndColumns( void MergeTreeData::setSkipIndexes(const ASTs & indexes_asts, bool only_check) { - if (!only_check) { - for (const auto &index_ast : indexes_asts) { + indexes.clear(); + std::set names; + if (!only_check) + { + for (const auto &index_ast : indexes_asts) + { indexes.push_back( std::move(MergeTreeIndexFactory::instance().get( *this, std::dynamic_pointer_cast(index_ast), global_context))); + if (names.find(indexes.back()->name) != names.end()) + { + throw Exception( + "Index with name `" + indexes.back()->name + "` already exsists", + ErrorCodes::LOGICAL_ERROR); + } + LOG_DEBUG(log, "new index init : " << indexes.back()->name); } } + LOG_DEBUG(log, "Indexes size: " << indexes.size()); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h b/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h index b501df59017..920c4bdb4b6 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h @@ -12,6 +12,12 @@ namespace DB { +namespace ErrorCodes +{ + extern const int FILE_DOESNT_EXIST; +} + + class MergeTreeTestIndex; struct MergeTreeTestGranule : public MergeTreeIndexGranule { @@ -24,6 +30,9 @@ struct MergeTreeTestGranule : public MergeTreeIndexGranule { void deserializeBinary(ReadBuffer &istr) override { readIntBinary(emp, istr); + if (emp != 10) { + throw Exception("kek bad read", ErrorCodes::FILE_DOESNT_EXIST); + } //std::cerr << "TESTINDEX: read " << emp << "\n"; } From d1e2ab85e33a1982461cbd39eed85a7af9efadf6 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 8 Jan 2019 14:04:25 +0300 Subject: [PATCH 037/586] asts -> exprlist --- .../Interpreters/InterpreterCreateQuery.cpp | 2 +- dbms/src/Parsers/ASTCreateQuery.h | 16 ++++++------- dbms/src/Parsers/ASTIndexDeclaration.h | 2 +- dbms/src/Parsers/ParserCreateQuery.cpp | 23 ++++++++++--------- dbms/src/Parsers/ParserCreateQuery.h | 13 ++++++++--- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 19 +++++++++------ dbms/src/Storages/MergeTree/MergeTreeData.h | 4 ++-- .../MergeTree/MergeTreeDataMergerMutator.cpp | 5 ---- .../MergeTree/registerStorageMergeTree.cpp | 8 +++---- dbms/src/Storages/StorageFactory.cpp | 2 +- dbms/src/Storages/StorageMergeTree.cpp | 2 +- dbms/src/Storages/StorageMergeTree.h | 2 +- .../Storages/StorageReplicatedMergeTree.cpp | 5 +--- .../src/Storages/StorageReplicatedMergeTree.h | 2 +- 14 files changed, 54 insertions(+), 51 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 16fe4ab67f1..8c01b62a592 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -106,7 +106,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) const ASTFunction & engine = *storage.engine; /// Currently, there are no database engines, that support any arguments. if (engine.arguments || engine.parameters || storage.partition_by || storage.primary_key - || storage.order_by || storage.sample_by || !storage.indexes.empty() || storage.settings) + || storage.order_by || storage.sample_by || (storage.indexes && !storage.indexes->children.empty()) || storage.settings) { std::stringstream ostr; formatAST(storage, ostr, false, false); diff --git a/dbms/src/Parsers/ASTCreateQuery.h b/dbms/src/Parsers/ASTCreateQuery.h index 82062fbab9d..8c1787f9bb4 100644 --- a/dbms/src/Parsers/ASTCreateQuery.h +++ b/dbms/src/Parsers/ASTCreateQuery.h @@ -19,7 +19,7 @@ public: IAST * primary_key = nullptr; IAST * order_by = nullptr; IAST * sample_by = nullptr; - std::vector indexes; + ASTExpressionList * indexes = nullptr; ASTSetQuery * settings = nullptr; String getID(char) const override { return "Storage definition"; } @@ -39,11 +39,8 @@ public: res->set(res->order_by, order_by->clone()); if (sample_by) res->set(res->sample_by, sample_by->clone()); - - for (const auto& index : indexes) { - res->indexes.emplace_back(nullptr); - res->set(res->indexes.back(), index->clone()); - } + if (indexes) + res->set(res->indexes, indexes->clone()); if (settings) res->set(res->settings, settings->clone()); @@ -78,9 +75,10 @@ public: s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << "SAMPLE BY " << (s.hilite ? hilite_none : ""); sample_by->formatImpl(s, state, frame); } - for (const auto& index : indexes) { - s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << "INDEX " << (s.hilite ? hilite_none : ""); - index->formatImpl(s, state, frame); + if (indexes) + { + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << "INDEXES " << (s.hilite ? hilite_none : ""); + indexes->formatImpl(s, state, frame); } if (settings) { diff --git a/dbms/src/Parsers/ASTIndexDeclaration.h b/dbms/src/Parsers/ASTIndexDeclaration.h index cfcf84c2528..968fb3f3d01 100644 --- a/dbms/src/Parsers/ASTIndexDeclaration.h +++ b/dbms/src/Parsers/ASTIndexDeclaration.h @@ -13,7 +13,7 @@ namespace DB { -/** Index name(expr) TYPE typename(args) in create query +/** name BY expr TYPE typename(args) GRANULARITY int in create query */ class ASTIndexDeclaration : public IAST { diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index 5577cd381b5..e021adec391 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -138,6 +138,12 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe return true; } +bool ParserIndexDeclarationList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + return ParserList(std::make_unique(), std::make_unique(TokenType::Comma), false) + .parse(pos, node, expected); +} + bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { @@ -147,20 +153,20 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKeyword s_primary_key("PRIMARY KEY"); ParserKeyword s_order_by("ORDER BY"); ParserKeyword s_sample_by("SAMPLE BY"); - ParserKeyword s_index("INDEX"); + ParserKeyword s_indexes("INDEXES"); ParserKeyword s_settings("SETTINGS"); ParserIdentifierWithOptionalParameters ident_with_optional_params_p; ParserExpression expression_p; ParserSetQuery settings_p(/* parse_only_internals_ = */ true); - ParserIndexDeclaration index_p; + ParserIndexDeclarationList indexes_p; ASTPtr engine; ASTPtr partition_by; ASTPtr primary_key; ASTPtr order_by; ASTPtr sample_by; - ASTs indexes; + ASTPtr indexes; ASTPtr settings; if (!s_engine.ignore(pos, expected)) @@ -205,9 +211,8 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; } - if (s_index.ignore(pos, expected)) { - indexes.emplace_back(nullptr); - if (index_p.parse(pos, indexes.back(), expected)) + if (s_indexes.ignore(pos, expected)) { + if (indexes_p.parse(pos, indexes, expected)) continue; else return false; @@ -228,11 +233,7 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) storage->set(storage->primary_key, primary_key); storage->set(storage->order_by, order_by); storage->set(storage->sample_by, sample_by); - - for (const auto& index : indexes) { - storage->indexes.emplace_back(nullptr); - storage->set(storage->indexes.back(), index); - } + storage->set(storage->indexes, indexes); storage->set(storage->settings, settings); diff --git a/dbms/src/Parsers/ParserCreateQuery.h b/dbms/src/Parsers/ParserCreateQuery.h index 3d117160872..99f1743fdab 100644 --- a/dbms/src/Parsers/ParserCreateQuery.h +++ b/dbms/src/Parsers/ParserCreateQuery.h @@ -220,7 +220,7 @@ protected: /** - * INDEX name BY expr TYPE typename(arg1, arg2, ...) GRANULARITY value + * name BY expr TYPE typename(arg1, arg2, ...) GRANULARITY value */ class ParserIndexDeclaration : public IParserBase { @@ -228,14 +228,21 @@ public: ParserIndexDeclaration() {} protected: - const char * getName() const override { return "INDEX"; } + const char * getName() const override { return "index declaration"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +class ParserIndexDeclarationList : public IParserBase +{ +protected: + const char * getName() const override { return "index declaration list"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; /** * ENGINE = name [PARTITION BY expr] [ORDER BY expr] [PRIMARY KEY expr] [SAMPLE BY expr] - * [INDEX name BY expr TYPE type(args) ... GRANULARITY value] [SETTINGS name = value, ...] + * [INDEXES name BY expr TYPE type(args) GRANULARITY value, ...] [SETTINGS name = value, ...] */ class ParserStorage : public IParserBase { diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 944dbaea0c7..f4fc9860b67 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -95,7 +95,7 @@ MergeTreeData::MergeTreeData( const ASTPtr & order_by_ast_, const ASTPtr & primary_key_ast_, const ASTPtr & sample_by_ast_, - const ASTs & indexes_ast_, + const ASTPtr & indexes_ast_, const MergingParams & merging_params_, const MergeTreeSettings & settings_, bool require_part_metadata_, @@ -350,13 +350,19 @@ void MergeTreeData::setPrimaryKeyAndColumns( } -void MergeTreeData::setSkipIndexes(const ASTs & indexes_asts, bool only_check) +void MergeTreeData::setSkipIndexes(const ASTPtr & indexes_asts, bool only_check) { - indexes.clear(); - std::set names; + if (!indexes_asts) + { + return; + } if (!only_check) { - for (const auto &index_ast : indexes_asts) + indexes.clear(); + std::set names; + auto index_list = std::dynamic_pointer_cast(indexes_asts); + + for (const auto &index_ast : index_list->children) { indexes.push_back( std::move(MergeTreeIndexFactory::instance().get( @@ -369,10 +375,9 @@ void MergeTreeData::setSkipIndexes(const ASTs & indexes_asts, bool only_check) "Index with name `" + indexes.back()->name + "` already exsists", ErrorCodes::LOGICAL_ERROR); } - LOG_DEBUG(log, "new index init : " << indexes.back()->name); + names.insert(indexes.back()->name); } } - LOG_DEBUG(log, "Indexes size: " << indexes.size()); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 7bc0ba84efd..e49a473afe1 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -309,7 +309,7 @@ public: const ASTPtr & order_by_ast_, const ASTPtr & primary_key_ast_, const ASTPtr & sample_by_ast_, /// nullptr, if sampling is not supported. - const ASTs & indexes_ast_, + const ASTPtr & indexes_ast_, const MergingParams & merging_params_, const MergeTreeSettings & settings_, bool require_part_metadata_, @@ -727,7 +727,7 @@ private: void setPrimaryKeyAndColumns(const ASTPtr & new_order_by_ast, ASTPtr new_primary_key_ast, const ColumnsDescription & new_columns, bool only_check = false); - void setSkipIndexes(const ASTs & indexes_asts, bool only_check = false); + void setSkipIndexes(const ASTPtr & indexes_asts, bool only_check = false); void initPartitionKey(); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 19b81833cbf..232146ea417 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -553,23 +553,18 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor Names all_column_names = data.getColumns().getNamesOfPhysical(); NamesAndTypesList all_columns = data.getColumns().getAllPhysical(); - LOG_DEBUG(log, "Before extract"); NamesAndTypesList gathering_columns, merging_columns; Names gathering_column_names, merging_column_names; extractMergingAndGatheringColumns( all_columns, data.sorting_key_expr, data.indexes, data.merging_params, gathering_columns, gathering_column_names, merging_columns, merging_column_names); - LOG_DEBUG(log, "After extract"); - MergeTreeData::MutableDataPartPtr new_data_part = std::make_shared( data, future_part.name, future_part.part_info); new_data_part->partition.assign(future_part.getPartition()); new_data_part->relative_path = TMP_PREFIX + future_part.name; new_data_part->is_temp = true; - LOG_DEBUG(log, "New Part"); - size_t sum_input_rows_upper_bound = merge_entry->total_size_marks * data.index_granularity; MergeAlgorithm merge_alg = chooseMergeAlgorithm(parts, sum_input_rows_upper_bound, gathering_columns, deduplicate); diff --git a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp index fdb87b77ef4..095009db00d 100644 --- a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -340,7 +340,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) bool is_extended_storage_def = args.storage_def->partition_by || args.storage_def->primary_key || args.storage_def->order_by - || args.storage_def->sample_by || !args.storage_def->indexes.empty() || args.storage_def->settings; + || args.storage_def->sample_by || (args.storage_def->indexes && !args.storage_def->indexes->children.empty()) || args.storage_def->settings; String name_part = args.engine_name.substr(0, args.engine_name.size() - strlen("MergeTree")); @@ -563,7 +563,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) ASTPtr order_by_ast; ASTPtr primary_key_ast; ASTPtr sample_by_ast; - ASTs indexes_ast; + ASTPtr indexes_ast; MergeTreeSettings storage_settings = args.context.getMergeTreeSettings(); if (is_extended_storage_def) @@ -584,8 +584,8 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (args.storage_def->sample_by) sample_by_ast = args.storage_def->sample_by->ptr(); - for (auto& index : args.storage_def->indexes) { - indexes_ast.push_back(index->ptr()); + if (args.storage_def->indexes) { + indexes_ast = args.storage_def->indexes->ptr(); } storage_settings.loadFromQuery(*args.storage_def); diff --git a/dbms/src/Storages/StorageFactory.cpp b/dbms/src/Storages/StorageFactory.cpp index b1072eb0b36..0830638b13d 100644 --- a/dbms/src/Storages/StorageFactory.cpp +++ b/dbms/src/Storages/StorageFactory.cpp @@ -95,7 +95,7 @@ StoragePtr StorageFactory::get( } if ((storage_def->partition_by || storage_def->primary_key || storage_def->order_by - || storage_def->sample_by || !storage_def->indexes.empty()) + || storage_def->sample_by || (storage_def->indexes && !storage_def->indexes->children.empty())) && !endsWith(name, "MergeTree")) { throw Exception( diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 6753303e459..833b20ab05b 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -58,7 +58,7 @@ StorageMergeTree::StorageMergeTree( const ASTPtr & order_by_ast_, const ASTPtr & primary_key_ast_, const ASTPtr & sample_by_ast_, /// nullptr, if sampling is not supported. - const ASTs & indexes_ast_, + const ASTPtr & indexes_ast_, const MergeTreeData::MergingParams & merging_params_, const MergeTreeSettings & settings_, bool has_force_restore_data_flag) diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index 764d78c6821..1a824be54bc 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -174,7 +174,7 @@ protected: const ASTPtr & order_by_ast_, const ASTPtr & primary_key_ast_, const ASTPtr & sample_by_ast_, /// nullptr, if sampling is not supported. - const ASTs & indexes_ast_, + const ASTPtr & indexes_ast_, const MergeTreeData::MergingParams & merging_params_, const MergeTreeSettings & settings_, bool has_force_restore_data_flag); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 7c2114158c3..f60250d1be5 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -206,7 +206,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( const ASTPtr & order_by_ast_, const ASTPtr & primary_key_ast_, const ASTPtr & sample_by_ast_, - const ASTs & indexes_ast_, + const ASTPtr & indexes_ast_, const MergeTreeData::MergingParams & merging_params_, const MergeTreeSettings & settings_, bool has_force_restore_data_flag) @@ -228,9 +228,6 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( { if (path_.empty()) throw Exception("ReplicatedMergeTree storages require data path", ErrorCodes::INCORRECT_FILE_NAME); - if (!indexes_ast_.empty()) { - throw Exception("check indexes support for ReplicatedMergeTree", ErrorCodes::INCORRECT_QUERY); - } if (!zookeeper_path.empty() && zookeeper_path.back() == '/') zookeeper_path.resize(zookeeper_path.size() - 1); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index 9538ed2887d..86bdf2d8af9 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -552,7 +552,7 @@ protected: const ASTPtr & order_by_ast_, const ASTPtr & primary_key_ast_, const ASTPtr & sample_by_ast_, - const ASTs & indexes_ast_, + const ASTPtr & indexes_ast_, const MergeTreeData::MergingParams & merging_params_, const MergeTreeSettings & settings_, bool has_force_restore_data_flag); From 49148ac3e901eb4b3a7300440c65646a4b474d4f Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 8 Jan 2019 20:27:44 +0300 Subject: [PATCH 038/586] minmax index --- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 2 +- .../Storages/MergeTree/MergeTreeIndexes.cpp | 11 +- .../src/Storages/MergeTree/MergeTreeIndexes.h | 15 +- .../MergeTree/MergeTreeMinMaxIndex.cpp | 140 ++++++++++++++++++ .../Storages/MergeTree/MergeTreeMinMaxIndex.h | 80 ++++++++++ .../Storages/MergeTree/MergeTreeTestIndex.h | 20 ++- .../MergeTree/registerStorageMergeTree.cpp | 2 + 7 files changed, 244 insertions(+), 26 deletions(-) create mode 100644 dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp create mode 100644 dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 87846e491bc..c8d92f32cac 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -997,7 +997,7 @@ namespace DB std::max(range.begin, index_mark * index->granularity), std::min(range.end, (index_mark + 1) * index->granularity)); - if (!condition->mayBeTrueOnGranule(*granule)) + if (!condition->mayBeTrueOnGranule(granule)) continue; if (res.empty() || res.back().end - data_range.begin >= min_marks_for_seek) diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp index 2620dc6cc38..02f58fe6275 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp @@ -6,6 +6,8 @@ #include +#include + namespace DB { @@ -17,7 +19,6 @@ namespace ErrorCodes extern const int UNKNOWN_EXCEPTION; } - void MergeTreeIndexFactory::registerIndex(const std::string &name, Creator creator) { if (!indexes.emplace(name, std::move(creator)).second) @@ -32,8 +33,12 @@ std::unique_ptr MergeTreeIndexFactory::get( { if (!node->type) throw Exception( - "for INDEX TYPE is required", - ErrorCodes::INCORRECT_QUERY); + "for index TYPE is required", ErrorCodes::INCORRECT_QUERY); + if (node->type->parameters && !node->type->parameters->children.empty()) + throw Exception( + "Index type can not have parameters", ErrorCodes::INCORRECT_QUERY); + + boost::algorithm::to_lower(node->type->name); auto it = indexes.find(node->type->name); if (it == indexes.end()) throw Exception( diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h index 239678e1d21..515d2843548 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h @@ -43,15 +43,11 @@ using MergeTreeIndexGranules = std::vector; /// Condition on the index. class IndexCondition { public: - IndexCondition() = default; virtual ~IndexCondition() = default; - /// Checks if this index is useful for query. virtual bool alwaysUnknownOrTrue() const = 0; - virtual bool mayBeTrueOnGranule(const MergeTreeIndexGranule & granule) const = 0; - - MergeTreeIndexPtr index; + virtual bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr granule) const = 0; }; using IndexConditionPtr = std::shared_ptr; @@ -61,12 +57,10 @@ using IndexConditionPtr = std::shared_ptr; class MergeTreeIndex { public: - MergeTreeIndex(String name, ExpressionActionsPtr expr, size_t granularity, Block key) - : name(name), expr(expr), granularity(granularity), sample(key) {} + MergeTreeIndex(String name, ExpressionActionsPtr expr, size_t granularity) + : name(name), expr(expr), granularity(granularity) {} - virtual ~MergeTreeIndex() {}; - - virtual String indexType() const { return "UNKNOWN"; }; + virtual ~MergeTreeIndex() = default; /// gets filename without extension String getFileName() const { return INDEX_FILE_PREFIX + name; }; @@ -81,7 +75,6 @@ public: size_t granularity; Names columns; DataTypes data_types; - Block sample; }; diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp new file mode 100644 index 00000000000..0cdea36621a --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp @@ -0,0 +1,140 @@ +#include + + +namespace DB +{ + +MergeTreeMinMaxGranule::MergeTreeMinMaxGranule(const MergeTreeMinMaxIndex & index) + : MergeTreeIndexGranule(), emp(true), index(index) +{ + parallelogram.reserve(index.columns.size()); +} + +void MergeTreeMinMaxGranule::serializeBinary(WriteBuffer & ostr) const +{ + if (empty()) + throw Exception( + "Attempt to write empty minmax index `" + index.name + "`", ErrorCodes::LOGICAL_ERROR); + + for (size_t i = 0; i < index.columns.size(); ++i) + { + const DataTypePtr & type = index.data_types[i]; + + type->serializeBinary(parallelogram[i].left, ostr); + type->serializeBinary(parallelogram[i].right, ostr); + } +} + +void MergeTreeMinMaxGranule::deserializeBinary(ReadBuffer & istr) +{ + for (size_t i = 0; i < index.columns.size(); ++i) + { + const DataTypePtr & type = index.data_types[i]; + + Field min_val; + type->deserializeBinary(min_val, istr); + Field max_val; + type->deserializeBinary(max_val, istr); + } + emp = true; +} + +void MergeTreeMinMaxGranule::update(const Block & block, size_t * pos, size_t limit) +{ + size_t rows_read = 0; + for (size_t i = 0; i < index.columns.size(); ++i) + { + auto column = block.getByName(index.columns[i]).column; + size_t cur; + /// TODO: more effective (index + getExtremes??) + for (cur = 0; cur < limit && cur + *pos < column->size(); ++cur) + { + Field field; + column->get(i, field); + if (parallelogram.size() < i) + { + parallelogram.emplace_back(field, true, field, true); + } + else + { + parallelogram[i].left = std::min(parallelogram[i].left, field); + parallelogram[i].right = std::max(parallelogram[i].right, field); + } + } + rows_read = cur; + } + + *pos += rows_read; + if (rows_read > 0) + emp = false; +}; + + +MinMaxCondition::MinMaxCondition( + const SelectQueryInfo &query, + const Context &context, + const MergeTreeMinMaxIndex &index) + : IndexCondition(), index(index), condition(query, context, index.columns, index.expr) {}; + +bool MinMaxCondition::alwaysUnknownOrTrue() const +{ + return condition.alwaysUnknownOrTrue(); +} + +bool MinMaxCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const +{ + std::shared_ptr granule + = std::dynamic_pointer_cast(idx_granule); + if (!granule) { + throw Exception( + "Minmax index condition got wrong granule", ErrorCodes::LOGICAL_ERROR); + } + + return condition.mayBeTrueInParallelogram(granule->parallelogram, index.data_types); +} + + +MergeTreeIndexGranulePtr MergeTreeMinMaxIndex::createIndexGranule() const +{ + return std::make_shared(*this); +} + +IndexConditionPtr MergeTreeMinMaxIndex::createIndexCondition( + const SelectQueryInfo & query, const Context & context) const +{ +return std::make_shared(query, context, *this); +}; + + +std::unique_ptr MergeTreeMinMaxIndexCreator( + const MergeTreeData & data, + std::shared_ptr node, + const Context & context) +{ + if (node->name.empty()) + throw Exception("Index must have unique name", ErrorCodes::INCORRECT_QUERY); + + if (node->type->arguments) + throw Exception("Minmax index have not any arguments", ErrorCodes::INCORRECT_QUERY); + + ASTPtr expr_list = MergeTreeData::extractKeyExpressionList(node->expr->clone()); + auto syntax = SyntaxAnalyzer(context, {}).analyze( + expr_list, data.getColumns().getAllPhysical()); + auto minmax_expr = ExpressionAnalyzer(expr_list, syntax, context).getActions(false); + + + auto minmax = std::make_unique( + node->name, std::move(minmax_expr), node->granularity.get()); + + const auto & columns_with_types = minmax->expr->getRequiredColumnsWithTypes(); + + for (const auto & column : columns_with_types) + { + minmax->columns.emplace_back(column.name); + minmax->data_types.emplace_back(column.type); + } + + return minmax; +} + +} \ No newline at end of file diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h new file mode 100644 index 00000000000..266cfbf04bc --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h @@ -0,0 +1,80 @@ +#pragma once + +#include +#include +#include +#include + +#include +#include +#include + +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int INCORRECT_QUERY; +} + +class MergeTreeMinMaxIndex; + +struct MergeTreeMinMaxGranule : public MergeTreeIndexGranule +{ + explicit MergeTreeMinMaxGranule(const MergeTreeMinMaxIndex & index); + + void serializeBinary(WriteBuffer & ostr) const override; + void deserializeBinary(ReadBuffer & istr) override; + bool empty() const override { return emp; } + + void update(const Block & block, size_t * pos, size_t limit) override; + + ~MergeTreeMinMaxGranule() override = default; + + bool emp; + const MergeTreeMinMaxIndex & index; + std::vector parallelogram; +}; + +class MinMaxCondition : public IndexCondition +{ +public: + MinMaxCondition( + const SelectQueryInfo & query, + const Context & context, + const MergeTreeMinMaxIndex & index); + + bool alwaysUnknownOrTrue() const override; + + bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const override; + + ~MinMaxCondition() override = default; +private: + const MergeTreeMinMaxIndex & index; + KeyCondition condition; +}; + + +class MergeTreeMinMaxIndex : public MergeTreeIndex +{ +public: + MergeTreeMinMaxIndex(String name, ExpressionActionsPtr expr, size_t granularity) + : MergeTreeIndex(name, expr, granularity) {} + + ~MergeTreeMinMaxIndex() override = default; + + MergeTreeIndexGranulePtr createIndexGranule() const override; + + IndexConditionPtr createIndexCondition( + const SelectQueryInfo & query, const Context & context) const override; + +}; + +std::unique_ptr MergeTreeMinMaxIndexCreator( + const MergeTreeData & data, std::shared_ptr node, const Context & context); + +} \ No newline at end of file diff --git a/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h b/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h index 920c4bdb4b6..64d298661b9 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h @@ -21,7 +21,7 @@ namespace ErrorCodes class MergeTreeTestIndex; struct MergeTreeTestGranule : public MergeTreeIndexGranule { - ~MergeTreeTestGranule() override {}; + ~MergeTreeTestGranule() override = default;; void serializeBinary(WriteBuffer &ostr) const override { //std::cerr << "TESTINDEX: written " << emp << "\n"; @@ -50,13 +50,13 @@ struct MergeTreeTestGranule : public MergeTreeIndexGranule { class IndexTestCondition : public IndexCondition{ public: - IndexTestCondition() = default; - ~IndexTestCondition() override {}; + IndexTestCondition(int) {}; + ~IndexTestCondition() override = default; /// Checks if this index is useful for query. bool alwaysUnknownOrTrue() const override { return false; }; - bool mayBeTrueOnGranule(const MergeTreeIndexGranule &) const override { + bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr) const override { return true; } @@ -66,12 +66,10 @@ public: class MergeTreeTestIndex : public MergeTreeIndex { public: - MergeTreeTestIndex(String name, ExpressionActionsPtr expr, size_t granularity, Block key) - : MergeTreeIndex(name, expr, granularity, key) {} + MergeTreeTestIndex(String name, ExpressionActionsPtr expr, size_t granularity) + : MergeTreeIndex(name, expr, granularity) {} - ~MergeTreeTestIndex() override {} - - String indexType() const override { return "TEST"; } + ~MergeTreeTestIndex() override = default; /// gets filename without extension @@ -81,7 +79,7 @@ public: IndexConditionPtr createIndexCondition( const SelectQueryInfo & , const Context & ) const override { - return std::make_shared(); + return std::make_shared(4); }; }; @@ -89,7 +87,7 @@ public: std::unique_ptr MTItestCreator( const MergeTreeData & data, std::shared_ptr node, const Context & ) { return std::make_unique( - node->name, data.primary_key_expr, node->granularity.get(), Block{}); + node->name, data.primary_key_expr, node->granularity.get()); } } \ No newline at end of file diff --git a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp index 095009db00d..8e964c80357 100644 --- a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -638,6 +639,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) static void registerMergeTreeSkipIndexes() { auto & factory = MergeTreeIndexFactory::instance(); factory.registerIndex("test", MTItestCreator); + factory.registerIndex("minmax", MergeTreeMinMaxIndexCreator); } From ad4df16899a66f055550318326c93f4eeeebce43 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 8 Jan 2019 22:41:36 +0300 Subject: [PATCH 039/586] fix --- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 1726 ++++++++--------- 1 file changed, 863 insertions(+), 863 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index c8d92f32cac..efa18a7d5d5 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -62,953 +62,953 @@ namespace ProfileEvents namespace DB { - namespace ErrorCodes - { - extern const int INDEX_NOT_USED; - extern const int SAMPLING_NOT_SUPPORTED; - extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER; - extern const int ILLEGAL_COLUMN; - extern const int ARGUMENT_OUT_OF_BOUND; - } +namespace ErrorCodes +{ + extern const int INDEX_NOT_USED; + extern const int SAMPLING_NOT_SUPPORTED; + extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER; + extern const int ILLEGAL_COLUMN; + extern const int ARGUMENT_OUT_OF_BOUND; +} - MergeTreeDataSelectExecutor::MergeTreeDataSelectExecutor(const MergeTreeData & data_) - : data(data_), log(&Logger::get(data.getLogName() + " (SelectExecutor)")) - { - } +MergeTreeDataSelectExecutor::MergeTreeDataSelectExecutor(const MergeTreeData & data_) + : data(data_), log(&Logger::get(data.getLogName() + " (SelectExecutor)")) +{ +} /// Construct a block consisting only of possible values of virtual columns - static Block getBlockWithPartColumn(const MergeTreeData::DataPartsVector & parts) +static Block getBlockWithPartColumn(const MergeTreeData::DataPartsVector & parts) +{ + auto column = ColumnString::create(); + + for (const auto & part : parts) + column->insert(part->name); + + return Block{ColumnWithTypeAndName(std::move(column), std::make_shared(), "_part")}; +} + + +size_t MergeTreeDataSelectExecutor::getApproximateTotalRowsToRead( + const MergeTreeData::DataPartsVector & parts, const KeyCondition & key_condition, const Settings & settings) const +{ + size_t full_marks_count = 0; + + /// We will find out how many rows we would have read without sampling. + LOG_DEBUG(log, "Preliminary index scan with condition: " << key_condition.toString()); + + for (size_t i = 0; i < parts.size(); ++i) { - auto column = ColumnString::create(); + const MergeTreeData::DataPartPtr & part = parts[i]; + MarkRanges ranges = markRangesFromPKRange(part->index, key_condition, settings); - for (const auto & part : parts) - column->insert(part->name); - - return Block{ColumnWithTypeAndName(std::move(column), std::make_shared(), "_part")}; + /** In order to get a lower bound on the number of rows that match the condition on PK, + * consider only guaranteed full marks. + * That is, do not take into account the first and last marks, which may be incomplete. + */ + for (size_t j = 0; j < ranges.size(); ++j) + if (ranges[j].end - ranges[j].begin > 2) + full_marks_count += ranges[j].end - ranges[j].begin - 2; } - - size_t MergeTreeDataSelectExecutor::getApproximateTotalRowsToRead( - const MergeTreeData::DataPartsVector & parts, const KeyCondition & key_condition, const Settings & settings) const - { - size_t full_marks_count = 0; - - /// We will find out how many rows we would have read without sampling. - LOG_DEBUG(log, "Preliminary index scan with condition: " << key_condition.toString()); - - for (size_t i = 0; i < parts.size(); ++i) - { - const MergeTreeData::DataPartPtr & part = parts[i]; - MarkRanges ranges = markRangesFromPKRange(part->index, key_condition, settings); - - /** In order to get a lower bound on the number of rows that match the condition on PK, - * consider only guaranteed full marks. - * That is, do not take into account the first and last marks, which may be incomplete. - */ - for (size_t j = 0; j < ranges.size(); ++j) - if (ranges[j].end - ranges[j].begin > 2) - full_marks_count += ranges[j].end - ranges[j].begin - 2; - } - - return full_marks_count * data.index_granularity; - } + return full_marks_count * data.index_granularity; +} - using RelativeSize = boost::rational; +using RelativeSize = boost::rational; - std::string toString(const RelativeSize & x) - { - return ASTSampleRatio::toString(x.numerator()) + "/" + ASTSampleRatio::toString(x.denominator()); - } +std::string toString(const RelativeSize & x) +{ + return ASTSampleRatio::toString(x.numerator()) + "/" + ASTSampleRatio::toString(x.denominator()); +} /// Converts sample size to an approximate number of rows (ex. `SAMPLE 1000000`) to relative value (ex. `SAMPLE 0.1`). - static RelativeSize convertAbsoluteSampleSizeToRelative(const ASTPtr & node, size_t approx_total_rows) +static RelativeSize convertAbsoluteSampleSizeToRelative(const ASTPtr & node, size_t approx_total_rows) +{ + if (approx_total_rows == 0) + return 1; + + const ASTSampleRatio & node_sample = typeid_cast(*node); + + auto absolute_sample_size = node_sample.ratio.numerator / node_sample.ratio.denominator; + return std::min(RelativeSize(1), RelativeSize(absolute_sample_size) / RelativeSize(approx_total_rows)); +} + + +BlockInputStreams MergeTreeDataSelectExecutor::read( + const Names & column_names_to_return, + const SelectQueryInfo & query_info, + const Context & context, + const size_t max_block_size, + const unsigned num_streams, + const PartitionIdToMaxBlock * max_block_numbers_to_read) const +{ + return readFromParts( + data.getDataPartsVector(), column_names_to_return, query_info, context, + max_block_size, num_streams, max_block_numbers_to_read); +} + +BlockInputStreams MergeTreeDataSelectExecutor::readFromParts( + MergeTreeData::DataPartsVector parts, + const Names & column_names_to_return, + const SelectQueryInfo & query_info, + const Context & context, + const size_t max_block_size, + const unsigned num_streams, + const PartitionIdToMaxBlock * max_block_numbers_to_read) const +{ + size_t part_index = 0; + + /// If query contains restrictions on the virtual column `_part` or `_part_index`, select only parts suitable for it. + /// The virtual column `_sample_factor` (which is equal to 1 / used sample rate) can be requested in the query. + Names virt_column_names; + Names real_column_names; + + bool part_column_queried = false; + + bool sample_factor_column_queried = false; + Float64 used_sample_factor = 1; + + for (const String & name : column_names_to_return) { - if (approx_total_rows == 0) - return 1; - - const ASTSampleRatio & node_sample = typeid_cast(*node); - - auto absolute_sample_size = node_sample.ratio.numerator / node_sample.ratio.denominator; - return std::min(RelativeSize(1), RelativeSize(absolute_sample_size) / RelativeSize(approx_total_rows)); - } - - - BlockInputStreams MergeTreeDataSelectExecutor::read( - const Names & column_names_to_return, - const SelectQueryInfo & query_info, - const Context & context, - const size_t max_block_size, - const unsigned num_streams, - const PartitionIdToMaxBlock * max_block_numbers_to_read) const - { - return readFromParts( - data.getDataPartsVector(), column_names_to_return, query_info, context, - max_block_size, num_streams, max_block_numbers_to_read); - } - - BlockInputStreams MergeTreeDataSelectExecutor::readFromParts( - MergeTreeData::DataPartsVector parts, - const Names & column_names_to_return, - const SelectQueryInfo & query_info, - const Context & context, - const size_t max_block_size, - const unsigned num_streams, - const PartitionIdToMaxBlock * max_block_numbers_to_read) const - { - size_t part_index = 0; - - /// If query contains restrictions on the virtual column `_part` or `_part_index`, select only parts suitable for it. - /// The virtual column `_sample_factor` (which is equal to 1 / used sample rate) can be requested in the query. - Names virt_column_names; - Names real_column_names; - - bool part_column_queried = false; - - bool sample_factor_column_queried = false; - Float64 used_sample_factor = 1; - - for (const String & name : column_names_to_return) + if (name == "_part") { - if (name == "_part") - { - part_column_queried = true; - virt_column_names.push_back(name); - } - else if (name == "_part_index") - { - virt_column_names.push_back(name); - } - else if (name == "_partition_id") - { - virt_column_names.push_back(name); - } - else if (name == "_sample_factor") - { - sample_factor_column_queried = true; - virt_column_names.push_back(name); - } - else - { - real_column_names.push_back(name); - } + part_column_queried = true; + virt_column_names.push_back(name); } - - NamesAndTypesList available_real_columns = data.getColumns().getAllPhysical(); - - /// If there are only virtual columns in the query, you must request at least one non-virtual one. - if (real_column_names.empty()) - real_column_names.push_back(ExpressionActions::getSmallestColumn(available_real_columns)); - - /// If `_part` virtual column is requested, we try to use it as an index. - Block virtual_columns_block = getBlockWithPartColumn(parts); - if (part_column_queried) - VirtualColumnUtils::filterBlockWithQuery(query_info.query, virtual_columns_block, context); - - std::multiset part_values = VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); - - data.check(real_column_names); - - const Settings & settings = context.getSettingsRef(); - Names primary_key_columns = data.primary_key_columns; - - KeyCondition key_condition(query_info, context, primary_key_columns, data.primary_key_expr); - - if (settings.force_primary_key && key_condition.alwaysUnknownOrTrue()) + else if (name == "_part_index") { - std::stringstream exception_message; - exception_message << "Primary key ("; - for (size_t i = 0, size = primary_key_columns.size(); i < size; ++i) - exception_message << (i == 0 ? "" : ", ") << primary_key_columns[i]; - exception_message << ") is not used and setting 'force_primary_key' is set."; - - throw Exception(exception_message.str(), ErrorCodes::INDEX_NOT_USED); + virt_column_names.push_back(name); } - - std::optional minmax_idx_condition; - if (data.minmax_idx_expr) + else if (name == "_partition_id") { - minmax_idx_condition.emplace(query_info, context, data.minmax_idx_columns, data.minmax_idx_expr); - - if (settings.force_index_by_date && minmax_idx_condition->alwaysUnknownOrTrue()) - { - String msg = "MinMax index by columns ("; - bool first = true; - for (const String & col : data.minmax_idx_columns) - { - if (first) - first = false; - else - msg += ", "; - msg += col; - } - msg += ") is not used and setting 'force_index_by_date' is set"; - - throw Exception(msg, ErrorCodes::INDEX_NOT_USED); - } + virt_column_names.push_back(name); } - - /// Select the parts in which there can be data that satisfy `minmax_idx_condition` and that match the condition on `_part`, - /// as well as `max_block_number_to_read`. + else if (name == "_sample_factor") { - auto prev_parts = parts; - parts.clear(); - - for (const auto & part : prev_parts) - { - if (part_values.find(part->name) == part_values.end()) - continue; - - if (part->isEmpty()) - continue; - - if (minmax_idx_condition && !minmax_idx_condition->mayBeTrueInParallelogram( - part->minmax_idx.parallelogram, data.minmax_idx_column_types)) - continue; - - if (max_block_numbers_to_read) - { - auto blocks_iterator = max_block_numbers_to_read->find(part->info.partition_id); - if (blocks_iterator == max_block_numbers_to_read->end() || part->info.max_block > blocks_iterator->second) - continue; - } - - parts.push_back(part); - } - } - - /// Sampling. - Names column_names_to_read = real_column_names; - std::shared_ptr filter_function; - ExpressionActionsPtr filter_expression; - - RelativeSize relative_sample_size = 0; - RelativeSize relative_sample_offset = 0; - - ASTSelectQuery & select = typeid_cast(*query_info.query); - - auto select_sample_size = select.sample_size(); - auto select_sample_offset = select.sample_offset(); - - if (select_sample_size) - { - relative_sample_size.assign( - typeid_cast(*select_sample_size).ratio.numerator, - typeid_cast(*select_sample_size).ratio.denominator); - - if (relative_sample_size < 0) - throw Exception("Negative sample size", ErrorCodes::ARGUMENT_OUT_OF_BOUND); - - relative_sample_offset = 0; - if (select_sample_offset) - relative_sample_offset.assign( - typeid_cast(*select_sample_offset).ratio.numerator, - typeid_cast(*select_sample_offset).ratio.denominator); - - if (relative_sample_offset < 0) - throw Exception("Negative sample offset", ErrorCodes::ARGUMENT_OUT_OF_BOUND); - - /// Convert absolute value of the sampling (in form `SAMPLE 1000000` - how many rows to read) into the relative `SAMPLE 0.1` (how much data to read). - size_t approx_total_rows = 0; - if (relative_sample_size > 1 || relative_sample_offset > 1) - approx_total_rows = getApproximateTotalRowsToRead(parts, key_condition, settings); - - if (relative_sample_size > 1) - { - relative_sample_size = convertAbsoluteSampleSizeToRelative(select_sample_size, approx_total_rows); - LOG_DEBUG(log, "Selected relative sample size: " << toString(relative_sample_size)); - } - - /// SAMPLE 1 is the same as the absence of SAMPLE. - if (relative_sample_size == RelativeSize(1)) - relative_sample_size = 0; - - if (relative_sample_offset > 0 && RelativeSize(0) == relative_sample_size) - throw Exception("Sampling offset is incorrect because no sampling", ErrorCodes::ARGUMENT_OUT_OF_BOUND); - - if (relative_sample_offset > 1) - { - relative_sample_offset = convertAbsoluteSampleSizeToRelative(select_sample_offset, approx_total_rows); - LOG_DEBUG(log, "Selected relative sample offset: " << toString(relative_sample_offset)); - } - } - - /** Which range of sampling key values do I need to read? - * First, in the whole range ("universe") we select the interval - * of relative `relative_sample_size` size, offset from the beginning by `relative_sample_offset`. - * - * Example: SAMPLE 0.4 OFFSET 0.3 - * - * [------********------] - * ^ - offset - * <------> - size - * - * If the interval passes through the end of the universe, then cut its right side. - * - * Example: SAMPLE 0.4 OFFSET 0.8 - * - * [----------------****] - * ^ - offset - * <------> - size - * - * Next, if the `parallel_replicas_count`, `parallel_replica_offset` settings are set, - * then it is necessary to break the received interval into pieces of the number `parallel_replicas_count`, - * and select a piece with the number `parallel_replica_offset` (from zero). - * - * Example: SAMPLE 0.4 OFFSET 0.3, parallel_replicas_count = 2, parallel_replica_offset = 1 - * - * [----------****------] - * ^ - offset - * <------> - size - * <--><--> - pieces for different `parallel_replica_offset`, select the second one. - * - * It is very important that the intervals for different `parallel_replica_offset` cover the entire range without gaps and overlaps. - * It is also important that the entire universe can be covered using SAMPLE 0.1 OFFSET 0, ... OFFSET 0.9 and similar decimals. - */ - - bool use_sampling = relative_sample_size > 0 || settings.parallel_replicas_count > 1; - bool no_data = false; /// There is nothing left after sampling. - - if (use_sampling) - { - if (!data.supportsSampling()) - throw Exception("Illegal SAMPLE: table doesn't support sampling", ErrorCodes::SAMPLING_NOT_SUPPORTED); - - if (sample_factor_column_queried && relative_sample_size != RelativeSize(0)) - used_sample_factor = 1.0 / boost::rational_cast(relative_sample_size); - - RelativeSize size_of_universum = 0; - DataTypePtr type = data.primary_key_sample.getByName(data.sampling_expr_column_name).type; - - if (typeid_cast(type.get())) - size_of_universum = RelativeSize(std::numeric_limits::max()) + RelativeSize(1); - else if (typeid_cast(type.get())) - size_of_universum = RelativeSize(std::numeric_limits::max()) + RelativeSize(1); - else if (typeid_cast(type.get())) - size_of_universum = RelativeSize(std::numeric_limits::max()) + RelativeSize(1); - else if (typeid_cast(type.get())) - size_of_universum = RelativeSize(std::numeric_limits::max()) + RelativeSize(1); - else - throw Exception("Invalid sampling column type in storage parameters: " + type->getName() + ". Must be unsigned integer type.", - ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); - - if (settings.parallel_replicas_count > 1) - { - if (relative_sample_size == RelativeSize(0)) - relative_sample_size = 1; - - relative_sample_size /= settings.parallel_replicas_count.value; - relative_sample_offset += relative_sample_size * RelativeSize(settings.parallel_replica_offset.value); - } - - if (relative_sample_offset >= RelativeSize(1)) - no_data = true; - - /// Calculate the half-interval of `[lower, upper)` column values. - bool has_lower_limit = false; - bool has_upper_limit = false; - - RelativeSize lower_limit_rational = relative_sample_offset * size_of_universum; - RelativeSize upper_limit_rational = (relative_sample_offset + relative_sample_size) * size_of_universum; - - UInt64 lower = boost::rational_cast(lower_limit_rational); - UInt64 upper = boost::rational_cast(upper_limit_rational); - - if (lower > 0) - has_lower_limit = true; - - if (upper_limit_rational < size_of_universum) - has_upper_limit = true; - - /*std::cerr << std::fixed << std::setprecision(100) - << "relative_sample_size: " << relative_sample_size << "\n" - << "relative_sample_offset: " << relative_sample_offset << "\n" - << "lower_limit_float: " << lower_limit_rational << "\n" - << "upper_limit_float: " << upper_limit_rational << "\n" - << "lower: " << lower << "\n" - << "upper: " << upper << "\n";*/ - - if ((has_upper_limit && upper == 0) - || (has_lower_limit && has_upper_limit && lower == upper)) - no_data = true; - - if (no_data || (!has_lower_limit && !has_upper_limit)) - { - use_sampling = false; - } - else - { - /// Let's add the conditions to cut off something else when the index is scanned again and when the request is processed. - - std::shared_ptr lower_function; - std::shared_ptr upper_function; - - if (has_lower_limit) - { - if (!key_condition.addCondition(data.sampling_expr_column_name, Range::createLeftBounded(lower, true))) - throw Exception("Sampling column not in primary key", ErrorCodes::ILLEGAL_COLUMN); - - ASTPtr args = std::make_shared(); - args->children.push_back(data.getSamplingExpression()); - args->children.push_back(std::make_shared(lower)); - - lower_function = std::make_shared(); - lower_function->name = "greaterOrEquals"; - lower_function->arguments = args; - lower_function->children.push_back(lower_function->arguments); - - filter_function = lower_function; - } - - if (has_upper_limit) - { - if (!key_condition.addCondition(data.sampling_expr_column_name, Range::createRightBounded(upper, false))) - throw Exception("Sampling column not in primary key", ErrorCodes::ILLEGAL_COLUMN); - - ASTPtr args = std::make_shared(); - args->children.push_back(data.getSamplingExpression()); - args->children.push_back(std::make_shared(upper)); - - upper_function = std::make_shared(); - upper_function->name = "less"; - upper_function->arguments = args; - upper_function->children.push_back(upper_function->arguments); - - filter_function = upper_function; - } - - if (has_lower_limit && has_upper_limit) - { - ASTPtr args = std::make_shared(); - args->children.push_back(lower_function); - args->children.push_back(upper_function); - - filter_function = std::make_shared(); - filter_function->name = "and"; - filter_function->arguments = args; - filter_function->children.push_back(filter_function->arguments); - } - - ASTPtr query = filter_function; - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, available_real_columns); - filter_expression = ExpressionAnalyzer(filter_function, syntax_result, context).getActions(false); - - /// Add columns needed for `sample_by_ast` to `column_names_to_read`. - std::vector add_columns = filter_expression->getRequiredColumns(); - column_names_to_read.insert(column_names_to_read.end(), add_columns.begin(), add_columns.end()); - std::sort(column_names_to_read.begin(), column_names_to_read.end()); - column_names_to_read.erase(std::unique(column_names_to_read.begin(), column_names_to_read.end()), column_names_to_read.end()); - } - } - - if (no_data) - { - LOG_DEBUG(log, "Sampling yields no data."); - return {}; - } - - LOG_DEBUG(log, "Key condition: " << key_condition.toString()); - if (minmax_idx_condition) - LOG_DEBUG(log, "MinMax index condition: " << minmax_idx_condition->toString()); - - /// PREWHERE - String prewhere_column; - if (select.prewhere_expression) - prewhere_column = select.prewhere_expression->getColumnName(); - - RangesInDataParts parts_with_ranges; - - /// Let's find what range to read from each part. - size_t sum_marks = 0; - size_t sum_ranges = 0; - for (auto & part : parts) - { - RangesInDataPart ranges(part, part_index++); - - if (data.hasPrimaryKey()) - ranges.ranges = markRangesFromPKRange(part->index, key_condition, settings); - else - ranges.ranges = MarkRanges{MarkRange{0, part->marks_count}}; - - /// It can be done in multiple threads (one thread for each part). - /// Maybe it should be moved to BlockInputStream, but it can cause some problems. - for (auto index : data.indexes) { - auto condition = index->createIndexCondition(query_info, context); - if (!condition->alwaysUnknownOrTrue()) { - ranges.ranges = filterMarksUsingIndex(index, condition, part, ranges.ranges, settings); - } - } - - if (!ranges.ranges.empty()) - { - parts_with_ranges.push_back(ranges); - - sum_ranges += ranges.ranges.size(); - for (const auto & range : ranges.ranges) - sum_marks += range.end - range.begin; - } - } - - LOG_DEBUG(log, "Selected " << parts.size() << " parts by date, " << parts_with_ranges.size() << " parts by key, " - << sum_marks << " marks to read from " << sum_ranges << " ranges"); - - if (parts_with_ranges.empty()) - return {}; - - ProfileEvents::increment(ProfileEvents::SelectedParts, parts_with_ranges.size()); - ProfileEvents::increment(ProfileEvents::SelectedRanges, sum_ranges); - ProfileEvents::increment(ProfileEvents::SelectedMarks, sum_marks); - - BlockInputStreams res; - - if (select.final()) - { - /// Add columns needed to calculate the sorting expression and the sign. - std::vector add_columns = data.sorting_key_expr->getRequiredColumns(); - column_names_to_read.insert(column_names_to_read.end(), add_columns.begin(), add_columns.end()); - - if (!data.merging_params.sign_column.empty()) - column_names_to_read.push_back(data.merging_params.sign_column); - if (!data.merging_params.version_column.empty()) - column_names_to_read.push_back(data.merging_params.version_column); - - std::sort(column_names_to_read.begin(), column_names_to_read.end()); - column_names_to_read.erase(std::unique(column_names_to_read.begin(), column_names_to_read.end()), column_names_to_read.end()); - - res = spreadMarkRangesAmongStreamsFinal( - std::move(parts_with_ranges), - column_names_to_read, - max_block_size, - settings.use_uncompressed_cache, - query_info.prewhere_info, - virt_column_names, - settings); + sample_factor_column_queried = true; + virt_column_names.push_back(name); } else { - res = spreadMarkRangesAmongStreams( - std::move(parts_with_ranges), - num_streams, - column_names_to_read, - max_block_size, - settings.use_uncompressed_cache, - query_info.prewhere_info, - virt_column_names, - settings); + real_column_names.push_back(name); } - - if (use_sampling) - for (auto & stream : res) - stream = std::make_shared(stream, filter_expression, filter_function->getColumnName()); - - /// By the way, if a distributed query or query to a Merge table is made, then the `_sample_factor` column can have different values. - if (sample_factor_column_queried) - for (auto & stream : res) - stream = std::make_shared>( - stream, std::make_shared(), used_sample_factor, "_sample_factor"); - - if (query_info.prewhere_info && query_info.prewhere_info->remove_columns_actions) - for (auto & stream : res) - stream = std::make_shared(stream, query_info.prewhere_info->remove_columns_actions); - - return res; } + NamesAndTypesList available_real_columns = data.getColumns().getAllPhysical(); - BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( - RangesInDataParts && parts, - size_t num_streams, - const Names & column_names, - size_t max_block_size, - bool use_uncompressed_cache, - const PrewhereInfoPtr & prewhere_info, - const Names & virt_columns, - const Settings & settings) const + /// If there are only virtual columns in the query, you must request at least one non-virtual one. + if (real_column_names.empty()) + real_column_names.push_back(ExpressionActions::getSmallestColumn(available_real_columns)); + + /// If `_part` virtual column is requested, we try to use it as an index. + Block virtual_columns_block = getBlockWithPartColumn(parts); + if (part_column_queried) + VirtualColumnUtils::filterBlockWithQuery(query_info.query, virtual_columns_block, context); + + std::multiset part_values = VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); + + data.check(real_column_names); + + const Settings & settings = context.getSettingsRef(); + Names primary_key_columns = data.primary_key_columns; + + KeyCondition key_condition(query_info, context, primary_key_columns, data.primary_key_expr); + + if (settings.force_primary_key && key_condition.alwaysUnknownOrTrue()) { - const size_t min_marks_for_concurrent_read = - (settings.merge_tree_min_rows_for_concurrent_read + data.index_granularity - 1) / data.index_granularity; - const size_t max_marks_to_use_cache = - (settings.merge_tree_max_rows_to_use_cache + data.index_granularity - 1) / data.index_granularity; + std::stringstream exception_message; + exception_message << "Primary key ("; + for (size_t i = 0, size = primary_key_columns.size(); i < size; ++i) + exception_message << (i == 0 ? "" : ", ") << primary_key_columns[i]; + exception_message << ") is not used and setting 'force_primary_key' is set."; - /// Count marks for each part. - std::vector sum_marks_in_parts(parts.size()); - size_t sum_marks = 0; - for (size_t i = 0; i < parts.size(); ++i) + throw Exception(exception_message.str(), ErrorCodes::INDEX_NOT_USED); + } + + std::optional minmax_idx_condition; + if (data.minmax_idx_expr) + { + minmax_idx_condition.emplace(query_info, context, data.minmax_idx_columns, data.minmax_idx_expr); + + if (settings.force_index_by_date && minmax_idx_condition->alwaysUnknownOrTrue()) { - /// Let the ranges be listed from right to left so that the leftmost range can be dropped using `pop_back()`. - std::reverse(parts[i].ranges.begin(), parts[i].ranges.end()); - - for (const auto & range : parts[i].ranges) - sum_marks_in_parts[i] += range.end - range.begin; - - sum_marks += sum_marks_in_parts[i]; - } - - if (sum_marks > max_marks_to_use_cache) - use_uncompressed_cache = false; - - BlockInputStreams res; - - if (sum_marks > 0 && settings.merge_tree_uniform_read_distribution == 1) - { - /// Reduce the number of num_streams if the data is small. - if (sum_marks < num_streams * min_marks_for_concurrent_read && parts.size() < num_streams) - num_streams = std::max((sum_marks + min_marks_for_concurrent_read - 1) / min_marks_for_concurrent_read, parts.size()); - - MergeTreeReadPoolPtr pool = std::make_shared( - num_streams, sum_marks, min_marks_for_concurrent_read, parts, data, prewhere_info, true, - column_names, MergeTreeReadPool::BackoffSettings(settings), settings.preferred_block_size_bytes, false); - - /// Let's estimate total number of rows for progress bar. - const size_t total_rows = data.index_granularity * sum_marks; - LOG_TRACE(log, "Reading approx. " << total_rows << " rows with " << num_streams << " streams"); - - for (size_t i = 0; i < num_streams; ++i) + String msg = "MinMax index by columns ("; + bool first = true; + for (const String & col : data.minmax_idx_columns) { - res.emplace_back(std::make_shared( - i, pool, min_marks_for_concurrent_read, max_block_size, settings.preferred_block_size_bytes, - settings.preferred_max_column_in_block_size_bytes, data, use_uncompressed_cache, - prewhere_info, settings, virt_columns)); - - if (i == 0) - { - /// Set the approximate number of rows for the first source only - static_cast(*res.front()).addTotalRowsApprox(total_rows); - } + if (first) + first = false; + else + msg += ", "; + msg += col; } + msg += ") is not used and setting 'force_index_by_date' is set"; + + throw Exception(msg, ErrorCodes::INDEX_NOT_USED); } - else if (sum_marks > 0) + } + + /// Select the parts in which there can be data that satisfy `minmax_idx_condition` and that match the condition on `_part`, + /// as well as `max_block_number_to_read`. + { + auto prev_parts = parts; + parts.clear(); + + for (const auto & part : prev_parts) { - const size_t min_marks_per_stream = (sum_marks - 1) / num_streams + 1; + if (part_values.find(part->name) == part_values.end()) + continue; - for (size_t i = 0; i < num_streams && !parts.empty(); ++i) + if (part->isEmpty()) + continue; + + if (minmax_idx_condition && !minmax_idx_condition->mayBeTrueInParallelogram( + part->minmax_idx.parallelogram, data.minmax_idx_column_types)) + continue; + + if (max_block_numbers_to_read) { - size_t need_marks = min_marks_per_stream; - - /// Loop over parts. - /// We will iteratively take part or some subrange of a part from the back - /// and assign a stream to read from it. - while (need_marks > 0 && !parts.empty()) - { - RangesInDataPart part = parts.back(); - parts.pop_back(); - - size_t & marks_in_part = sum_marks_in_parts.back(); - - /// We will not take too few rows from a part. - if (marks_in_part >= min_marks_for_concurrent_read && - need_marks < min_marks_for_concurrent_read) - need_marks = min_marks_for_concurrent_read; - - /// Do not leave too few rows in the part. - if (marks_in_part > need_marks && - marks_in_part - need_marks < min_marks_for_concurrent_read) - need_marks = marks_in_part; - - MarkRanges ranges_to_get_from_part; - - /// We take the whole part if it is small enough. - if (marks_in_part <= need_marks) - { - /// Restore the order of segments. - std::reverse(part.ranges.begin(), part.ranges.end()); - - ranges_to_get_from_part = part.ranges; - - need_marks -= marks_in_part; - sum_marks_in_parts.pop_back(); - } - else - { - /// Loop through ranges in part. Take enough ranges to cover "need_marks". - while (need_marks > 0) - { - if (part.ranges.empty()) - throw Exception("Unexpected end of ranges while spreading marks among streams", ErrorCodes::LOGICAL_ERROR); - - MarkRange & range = part.ranges.back(); - - const size_t marks_in_range = range.end - range.begin; - const size_t marks_to_get_from_range = std::min(marks_in_range, need_marks); - - ranges_to_get_from_part.emplace_back(range.begin, range.begin + marks_to_get_from_range); - range.begin += marks_to_get_from_range; - marks_in_part -= marks_to_get_from_range; - need_marks -= marks_to_get_from_range; - if (range.begin == range.end) - part.ranges.pop_back(); - } - parts.emplace_back(part); - } - - BlockInputStreamPtr source_stream = std::make_shared( - data, part.data_part, max_block_size, settings.preferred_block_size_bytes, - settings.preferred_max_column_in_block_size_bytes, column_names, ranges_to_get_from_part, - use_uncompressed_cache, prewhere_info, true, settings.min_bytes_to_use_direct_io, - settings.max_read_buffer_size, true, virt_columns, part.part_index_in_query); - - res.push_back(source_stream); - } + auto blocks_iterator = max_block_numbers_to_read->find(part->info.partition_id); + if (blocks_iterator == max_block_numbers_to_read->end() || part->info.max_block > blocks_iterator->second) + continue; } - if (!parts.empty()) - throw Exception("Couldn't spread marks among streams", ErrorCodes::LOGICAL_ERROR); + parts.push_back(part); } - - return res; } - BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( - RangesInDataParts && parts, - const Names & column_names, - size_t max_block_size, - bool use_uncompressed_cache, - const PrewhereInfoPtr & prewhere_info, - const Names & virt_columns, - const Settings & settings) const + /// Sampling. + Names column_names_to_read = real_column_names; + std::shared_ptr filter_function; + ExpressionActionsPtr filter_expression; + + RelativeSize relative_sample_size = 0; + RelativeSize relative_sample_offset = 0; + + ASTSelectQuery & select = typeid_cast(*query_info.query); + + auto select_sample_size = select.sample_size(); + auto select_sample_offset = select.sample_offset(); + + if (select_sample_size) { - const size_t max_marks_to_use_cache = - (settings.merge_tree_max_rows_to_use_cache + data.index_granularity - 1) / data.index_granularity; + relative_sample_size.assign( + typeid_cast(*select_sample_size).ratio.numerator, + typeid_cast(*select_sample_size).ratio.denominator); - size_t sum_marks = 0; - for (size_t i = 0; i < parts.size(); ++i) - for (size_t j = 0; j < parts[i].ranges.size(); ++j) - sum_marks += parts[i].ranges[j].end - parts[i].ranges[j].begin; + if (relative_sample_size < 0) + throw Exception("Negative sample size", ErrorCodes::ARGUMENT_OUT_OF_BOUND); - if (sum_marks > max_marks_to_use_cache) - use_uncompressed_cache = false; + relative_sample_offset = 0; + if (select_sample_offset) + relative_sample_offset.assign( + typeid_cast(*select_sample_offset).ratio.numerator, + typeid_cast(*select_sample_offset).ratio.denominator); - BlockInputStreams to_merge; + if (relative_sample_offset < 0) + throw Exception("Negative sample offset", ErrorCodes::ARGUMENT_OUT_OF_BOUND); - /// NOTE `merge_tree_uniform_read_distribution` is not used for FINAL + /// Convert absolute value of the sampling (in form `SAMPLE 1000000` - how many rows to read) into the relative `SAMPLE 0.1` (how much data to read). + size_t approx_total_rows = 0; + if (relative_sample_size > 1 || relative_sample_offset > 1) + approx_total_rows = getApproximateTotalRowsToRead(parts, key_condition, settings); - for (size_t part_index = 0; part_index < parts.size(); ++part_index) + if (relative_sample_size > 1) { - RangesInDataPart & part = parts[part_index]; - - BlockInputStreamPtr source_stream = std::make_shared( - data, part.data_part, max_block_size, settings.preferred_block_size_bytes, - settings.preferred_max_column_in_block_size_bytes, column_names, part.ranges, use_uncompressed_cache, - prewhere_info, true, settings.min_bytes_to_use_direct_io, settings.max_read_buffer_size, true, - virt_columns, part.part_index_in_query); - - to_merge.emplace_back(std::make_shared(source_stream, data.sorting_key_expr)); + relative_sample_size = convertAbsoluteSampleSizeToRelative(select_sample_size, approx_total_rows); + LOG_DEBUG(log, "Selected relative sample size: " << toString(relative_sample_size)); } - Names sort_columns = data.sorting_key_columns; - SortDescription sort_description; - size_t sort_columns_size = sort_columns.size(); - sort_description.reserve(sort_columns_size); + /// SAMPLE 1 is the same as the absence of SAMPLE. + if (relative_sample_size == RelativeSize(1)) + relative_sample_size = 0; - Block header = to_merge.at(0)->getHeader(); - for (size_t i = 0; i < sort_columns_size; ++i) - sort_description.emplace_back(header.getPositionByName(sort_columns[i]), 1, 1); + if (relative_sample_offset > 0 && RelativeSize(0) == relative_sample_size) + throw Exception("Sampling offset is incorrect because no sampling", ErrorCodes::ARGUMENT_OUT_OF_BOUND); - BlockInputStreamPtr merged; - switch (data.merging_params.mode) + if (relative_sample_offset > 1) { - case MergeTreeData::MergingParams::Ordinary: - merged = std::make_shared(to_merge, sort_description, max_block_size); - break; - - case MergeTreeData::MergingParams::Collapsing: - merged = std::make_shared( - to_merge, sort_description, data.merging_params.sign_column); - break; - - case MergeTreeData::MergingParams::Summing: - merged = std::make_shared(to_merge, - sort_description, data.merging_params.columns_to_sum, max_block_size); - break; - - case MergeTreeData::MergingParams::Aggregating: - merged = std::make_shared(to_merge, sort_description, max_block_size); - break; - - case MergeTreeData::MergingParams::Replacing: /// TODO Make ReplacingFinalBlockInputStream - merged = std::make_shared(to_merge, - sort_description, data.merging_params.version_column, max_block_size); - break; - - case MergeTreeData::MergingParams::VersionedCollapsing: /// TODO Make VersionedCollapsingFinalBlockInputStream - merged = std::make_shared( - to_merge, sort_description, data.merging_params.sign_column, max_block_size); - break; - - case MergeTreeData::MergingParams::Graphite: - throw Exception("GraphiteMergeTree doesn't support FINAL", ErrorCodes::LOGICAL_ERROR); + relative_sample_offset = convertAbsoluteSampleSizeToRelative(select_sample_offset, approx_total_rows); + LOG_DEBUG(log, "Selected relative sample offset: " << toString(relative_sample_offset)); } - - return {merged}; } + /** Which range of sampling key values do I need to read? + * First, in the whole range ("universe") we select the interval + * of relative `relative_sample_size` size, offset from the beginning by `relative_sample_offset`. + * + * Example: SAMPLE 0.4 OFFSET 0.3 + * + * [------********------] + * ^ - offset + * <------> - size + * + * If the interval passes through the end of the universe, then cut its right side. + * + * Example: SAMPLE 0.4 OFFSET 0.8 + * + * [----------------****] + * ^ - offset + * <------> - size + * + * Next, if the `parallel_replicas_count`, `parallel_replica_offset` settings are set, + * then it is necessary to break the received interval into pieces of the number `parallel_replicas_count`, + * and select a piece with the number `parallel_replica_offset` (from zero). + * + * Example: SAMPLE 0.4 OFFSET 0.3, parallel_replicas_count = 2, parallel_replica_offset = 1 + * + * [----------****------] + * ^ - offset + * <------> - size + * <--><--> - pieces for different `parallel_replica_offset`, select the second one. + * + * It is very important that the intervals for different `parallel_replica_offset` cover the entire range without gaps and overlaps. + * It is also important that the entire universe can be covered using SAMPLE 0.1 OFFSET 0, ... OFFSET 0.9 and similar decimals. + */ - void MergeTreeDataSelectExecutor::createPositiveSignCondition( - ExpressionActionsPtr & out_expression, String & out_column, const Context & context) const + bool use_sampling = relative_sample_size > 0 || settings.parallel_replicas_count > 1; + bool no_data = false; /// There is nothing left after sampling. + + if (use_sampling) { - auto function = std::make_shared(); - auto arguments = std::make_shared(); - auto sign = std::make_shared(data.merging_params.sign_column); - auto one = std::make_shared(1); + if (!data.supportsSampling()) + throw Exception("Illegal SAMPLE: table doesn't support sampling", ErrorCodes::SAMPLING_NOT_SUPPORTED); - function->name = "equals"; - function->arguments = arguments; - function->children.push_back(arguments); + if (sample_factor_column_queried && relative_sample_size != RelativeSize(0)) + used_sample_factor = 1.0 / boost::rational_cast(relative_sample_size); - arguments->children.push_back(sign); - arguments->children.push_back(one); + RelativeSize size_of_universum = 0; + DataTypePtr type = data.primary_key_sample.getByName(data.sampling_expr_column_name).type; - ASTPtr query = function; - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, data.getColumns().getAllPhysical()); - out_expression = ExpressionAnalyzer(query, syntax_result, context).getActions(false); - out_column = function->getColumnName(); + if (typeid_cast(type.get())) + size_of_universum = RelativeSize(std::numeric_limits::max()) + RelativeSize(1); + else if (typeid_cast(type.get())) + size_of_universum = RelativeSize(std::numeric_limits::max()) + RelativeSize(1); + else if (typeid_cast(type.get())) + size_of_universum = RelativeSize(std::numeric_limits::max()) + RelativeSize(1); + else if (typeid_cast(type.get())) + size_of_universum = RelativeSize(std::numeric_limits::max()) + RelativeSize(1); + else + throw Exception("Invalid sampling column type in storage parameters: " + type->getName() + ". Must be unsigned integer type.", + ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); + + if (settings.parallel_replicas_count > 1) + { + if (relative_sample_size == RelativeSize(0)) + relative_sample_size = 1; + + relative_sample_size /= settings.parallel_replicas_count.value; + relative_sample_offset += relative_sample_size * RelativeSize(settings.parallel_replica_offset.value); + } + + if (relative_sample_offset >= RelativeSize(1)) + no_data = true; + + /// Calculate the half-interval of `[lower, upper)` column values. + bool has_lower_limit = false; + bool has_upper_limit = false; + + RelativeSize lower_limit_rational = relative_sample_offset * size_of_universum; + RelativeSize upper_limit_rational = (relative_sample_offset + relative_sample_size) * size_of_universum; + + UInt64 lower = boost::rational_cast(lower_limit_rational); + UInt64 upper = boost::rational_cast(upper_limit_rational); + + if (lower > 0) + has_lower_limit = true; + + if (upper_limit_rational < size_of_universum) + has_upper_limit = true; + + /*std::cerr << std::fixed << std::setprecision(100) + << "relative_sample_size: " << relative_sample_size << "\n" + << "relative_sample_offset: " << relative_sample_offset << "\n" + << "lower_limit_float: " << lower_limit_rational << "\n" + << "upper_limit_float: " << upper_limit_rational << "\n" + << "lower: " << lower << "\n" + << "upper: " << upper << "\n";*/ + + if ((has_upper_limit && upper == 0) + || (has_lower_limit && has_upper_limit && lower == upper)) + no_data = true; + + if (no_data || (!has_lower_limit && !has_upper_limit)) + { + use_sampling = false; + } + else + { + /// Let's add the conditions to cut off something else when the index is scanned again and when the request is processed. + + std::shared_ptr lower_function; + std::shared_ptr upper_function; + + if (has_lower_limit) + { + if (!key_condition.addCondition(data.sampling_expr_column_name, Range::createLeftBounded(lower, true))) + throw Exception("Sampling column not in primary key", ErrorCodes::ILLEGAL_COLUMN); + + ASTPtr args = std::make_shared(); + args->children.push_back(data.getSamplingExpression()); + args->children.push_back(std::make_shared(lower)); + + lower_function = std::make_shared(); + lower_function->name = "greaterOrEquals"; + lower_function->arguments = args; + lower_function->children.push_back(lower_function->arguments); + + filter_function = lower_function; + } + + if (has_upper_limit) + { + if (!key_condition.addCondition(data.sampling_expr_column_name, Range::createRightBounded(upper, false))) + throw Exception("Sampling column not in primary key", ErrorCodes::ILLEGAL_COLUMN); + + ASTPtr args = std::make_shared(); + args->children.push_back(data.getSamplingExpression()); + args->children.push_back(std::make_shared(upper)); + + upper_function = std::make_shared(); + upper_function->name = "less"; + upper_function->arguments = args; + upper_function->children.push_back(upper_function->arguments); + + filter_function = upper_function; + } + + if (has_lower_limit && has_upper_limit) + { + ASTPtr args = std::make_shared(); + args->children.push_back(lower_function); + args->children.push_back(upper_function); + + filter_function = std::make_shared(); + filter_function->name = "and"; + filter_function->arguments = args; + filter_function->children.push_back(filter_function->arguments); + } + + ASTPtr query = filter_function; + auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, available_real_columns); + filter_expression = ExpressionAnalyzer(filter_function, syntax_result, context).getActions(false); + + /// Add columns needed for `sample_by_ast` to `column_names_to_read`. + std::vector add_columns = filter_expression->getRequiredColumns(); + column_names_to_read.insert(column_names_to_read.end(), add_columns.begin(), add_columns.end()); + std::sort(column_names_to_read.begin(), column_names_to_read.end()); + column_names_to_read.erase(std::unique(column_names_to_read.begin(), column_names_to_read.end()), column_names_to_read.end()); + } } + if (no_data) + { + LOG_DEBUG(log, "Sampling yields no data."); + return {}; + } + + LOG_DEBUG(log, "Key condition: " << key_condition.toString()); + if (minmax_idx_condition) + LOG_DEBUG(log, "MinMax index condition: " << minmax_idx_condition->toString()); + + /// PREWHERE + String prewhere_column; + if (select.prewhere_expression) + prewhere_column = select.prewhere_expression->getColumnName(); + + RangesInDataParts parts_with_ranges; + + /// Let's find what range to read from each part. + size_t sum_marks = 0; + size_t sum_ranges = 0; + for (auto & part : parts) + { + RangesInDataPart ranges(part, part_index++); + + if (data.hasPrimaryKey()) + ranges.ranges = markRangesFromPKRange(part->index, key_condition, settings); + else + ranges.ranges = MarkRanges{MarkRange{0, part->marks_count}}; + + /// It can be done in multiple threads (one thread for each part). + /// Maybe it should be moved to BlockInputStream, but it can cause some problems. + for (auto index : data.indexes) { + auto condition = index->createIndexCondition(query_info, context); + if (!condition->alwaysUnknownOrTrue()) { + ranges.ranges = filterMarksUsingIndex(index, condition, part, ranges.ranges, settings); + } + } + + if (!ranges.ranges.empty()) + { + parts_with_ranges.push_back(ranges); + + sum_ranges += ranges.ranges.size(); + for (const auto & range : ranges.ranges) + sum_marks += range.end - range.begin; + } + } + + LOG_DEBUG(log, "Selected " << parts.size() << " parts by date, " << parts_with_ranges.size() << " parts by key, " + << sum_marks << " marks to read from " << sum_ranges << " ranges"); + + if (parts_with_ranges.empty()) + return {}; + + ProfileEvents::increment(ProfileEvents::SelectedParts, parts_with_ranges.size()); + ProfileEvents::increment(ProfileEvents::SelectedRanges, sum_ranges); + ProfileEvents::increment(ProfileEvents::SelectedMarks, sum_marks); + + BlockInputStreams res; + + if (select.final()) + { + /// Add columns needed to calculate the sorting expression and the sign. + std::vector add_columns = data.sorting_key_expr->getRequiredColumns(); + column_names_to_read.insert(column_names_to_read.end(), add_columns.begin(), add_columns.end()); + + if (!data.merging_params.sign_column.empty()) + column_names_to_read.push_back(data.merging_params.sign_column); + if (!data.merging_params.version_column.empty()) + column_names_to_read.push_back(data.merging_params.version_column); + + std::sort(column_names_to_read.begin(), column_names_to_read.end()); + column_names_to_read.erase(std::unique(column_names_to_read.begin(), column_names_to_read.end()), column_names_to_read.end()); + + res = spreadMarkRangesAmongStreamsFinal( + std::move(parts_with_ranges), + column_names_to_read, + max_block_size, + settings.use_uncompressed_cache, + query_info.prewhere_info, + virt_column_names, + settings); + } + else + { + res = spreadMarkRangesAmongStreams( + std::move(parts_with_ranges), + num_streams, + column_names_to_read, + max_block_size, + settings.use_uncompressed_cache, + query_info.prewhere_info, + virt_column_names, + settings); + } + + if (use_sampling) + for (auto & stream : res) + stream = std::make_shared(stream, filter_expression, filter_function->getColumnName()); + + /// By the way, if a distributed query or query to a Merge table is made, then the `_sample_factor` column can have different values. + if (sample_factor_column_queried) + for (auto & stream : res) + stream = std::make_shared>( + stream, std::make_shared(), used_sample_factor, "_sample_factor"); + + if (query_info.prewhere_info && query_info.prewhere_info->remove_columns_actions) + for (auto & stream : res) + stream = std::make_shared(stream, query_info.prewhere_info->remove_columns_actions); + + return res; +} + + +BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( + RangesInDataParts && parts, + size_t num_streams, + const Names & column_names, + size_t max_block_size, + bool use_uncompressed_cache, + const PrewhereInfoPtr & prewhere_info, + const Names & virt_columns, + const Settings & settings) const +{ + const size_t min_marks_for_concurrent_read = + (settings.merge_tree_min_rows_for_concurrent_read + data.index_granularity - 1) / data.index_granularity; + const size_t max_marks_to_use_cache = + (settings.merge_tree_max_rows_to_use_cache + data.index_granularity - 1) / data.index_granularity; + + /// Count marks for each part. + std::vector sum_marks_in_parts(parts.size()); + size_t sum_marks = 0; + for (size_t i = 0; i < parts.size(); ++i) + { + /// Let the ranges be listed from right to left so that the leftmost range can be dropped using `pop_back()`. + std::reverse(parts[i].ranges.begin(), parts[i].ranges.end()); + + for (const auto & range : parts[i].ranges) + sum_marks_in_parts[i] += range.end - range.begin; + + sum_marks += sum_marks_in_parts[i]; + } + + if (sum_marks > max_marks_to_use_cache) + use_uncompressed_cache = false; + + BlockInputStreams res; + + if (sum_marks > 0 && settings.merge_tree_uniform_read_distribution == 1) + { + /// Reduce the number of num_streams if the data is small. + if (sum_marks < num_streams * min_marks_for_concurrent_read && parts.size() < num_streams) + num_streams = std::max((sum_marks + min_marks_for_concurrent_read - 1) / min_marks_for_concurrent_read, parts.size()); + + MergeTreeReadPoolPtr pool = std::make_shared( + num_streams, sum_marks, min_marks_for_concurrent_read, parts, data, prewhere_info, true, + column_names, MergeTreeReadPool::BackoffSettings(settings), settings.preferred_block_size_bytes, false); + + /// Let's estimate total number of rows for progress bar. + const size_t total_rows = data.index_granularity * sum_marks; + LOG_TRACE(log, "Reading approx. " << total_rows << " rows with " << num_streams << " streams"); + + for (size_t i = 0; i < num_streams; ++i) + { + res.emplace_back(std::make_shared( + i, pool, min_marks_for_concurrent_read, max_block_size, settings.preferred_block_size_bytes, + settings.preferred_max_column_in_block_size_bytes, data, use_uncompressed_cache, + prewhere_info, settings, virt_columns)); + + if (i == 0) + { + /// Set the approximate number of rows for the first source only + static_cast(*res.front()).addTotalRowsApprox(total_rows); + } + } + } + else if (sum_marks > 0) + { + const size_t min_marks_per_stream = (sum_marks - 1) / num_streams + 1; + + for (size_t i = 0; i < num_streams && !parts.empty(); ++i) + { + size_t need_marks = min_marks_per_stream; + + /// Loop over parts. + /// We will iteratively take part or some subrange of a part from the back + /// and assign a stream to read from it. + while (need_marks > 0 && !parts.empty()) + { + RangesInDataPart part = parts.back(); + parts.pop_back(); + + size_t & marks_in_part = sum_marks_in_parts.back(); + + /// We will not take too few rows from a part. + if (marks_in_part >= min_marks_for_concurrent_read && + need_marks < min_marks_for_concurrent_read) + need_marks = min_marks_for_concurrent_read; + + /// Do not leave too few rows in the part. + if (marks_in_part > need_marks && + marks_in_part - need_marks < min_marks_for_concurrent_read) + need_marks = marks_in_part; + + MarkRanges ranges_to_get_from_part; + + /// We take the whole part if it is small enough. + if (marks_in_part <= need_marks) + { + /// Restore the order of segments. + std::reverse(part.ranges.begin(), part.ranges.end()); + + ranges_to_get_from_part = part.ranges; + + need_marks -= marks_in_part; + sum_marks_in_parts.pop_back(); + } + else + { + /// Loop through ranges in part. Take enough ranges to cover "need_marks". + while (need_marks > 0) + { + if (part.ranges.empty()) + throw Exception("Unexpected end of ranges while spreading marks among streams", ErrorCodes::LOGICAL_ERROR); + + MarkRange & range = part.ranges.back(); + + const size_t marks_in_range = range.end - range.begin; + const size_t marks_to_get_from_range = std::min(marks_in_range, need_marks); + + ranges_to_get_from_part.emplace_back(range.begin, range.begin + marks_to_get_from_range); + range.begin += marks_to_get_from_range; + marks_in_part -= marks_to_get_from_range; + need_marks -= marks_to_get_from_range; + if (range.begin == range.end) + part.ranges.pop_back(); + } + parts.emplace_back(part); + } + + BlockInputStreamPtr source_stream = std::make_shared( + data, part.data_part, max_block_size, settings.preferred_block_size_bytes, + settings.preferred_max_column_in_block_size_bytes, column_names, ranges_to_get_from_part, + use_uncompressed_cache, prewhere_info, true, settings.min_bytes_to_use_direct_io, + settings.max_read_buffer_size, true, virt_columns, part.part_index_in_query); + + res.push_back(source_stream); + } + } + + if (!parts.empty()) + throw Exception("Couldn't spread marks among streams", ErrorCodes::LOGICAL_ERROR); + } + + return res; +} + +BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( + RangesInDataParts && parts, + const Names & column_names, + size_t max_block_size, + bool use_uncompressed_cache, + const PrewhereInfoPtr & prewhere_info, + const Names & virt_columns, + const Settings & settings) const +{ + const size_t max_marks_to_use_cache = + (settings.merge_tree_max_rows_to_use_cache + data.index_granularity - 1) / data.index_granularity; + + size_t sum_marks = 0; + for (size_t i = 0; i < parts.size(); ++i) + for (size_t j = 0; j < parts[i].ranges.size(); ++j) + sum_marks += parts[i].ranges[j].end - parts[i].ranges[j].begin; + + if (sum_marks > max_marks_to_use_cache) + use_uncompressed_cache = false; + + BlockInputStreams to_merge; + + /// NOTE `merge_tree_uniform_read_distribution` is not used for FINAL + + for (size_t part_index = 0; part_index < parts.size(); ++part_index) + { + RangesInDataPart & part = parts[part_index]; + + BlockInputStreamPtr source_stream = std::make_shared( + data, part.data_part, max_block_size, settings.preferred_block_size_bytes, + settings.preferred_max_column_in_block_size_bytes, column_names, part.ranges, use_uncompressed_cache, + prewhere_info, true, settings.min_bytes_to_use_direct_io, settings.max_read_buffer_size, true, + virt_columns, part.part_index_in_query); + + to_merge.emplace_back(std::make_shared(source_stream, data.sorting_key_expr)); + } + + Names sort_columns = data.sorting_key_columns; + SortDescription sort_description; + size_t sort_columns_size = sort_columns.size(); + sort_description.reserve(sort_columns_size); + + Block header = to_merge.at(0)->getHeader(); + for (size_t i = 0; i < sort_columns_size; ++i) + sort_description.emplace_back(header.getPositionByName(sort_columns[i]), 1, 1); + + BlockInputStreamPtr merged; + switch (data.merging_params.mode) + { + case MergeTreeData::MergingParams::Ordinary: + merged = std::make_shared(to_merge, sort_description, max_block_size); + break; + + case MergeTreeData::MergingParams::Collapsing: + merged = std::make_shared( + to_merge, sort_description, data.merging_params.sign_column); + break; + + case MergeTreeData::MergingParams::Summing: + merged = std::make_shared(to_merge, + sort_description, data.merging_params.columns_to_sum, max_block_size); + break; + + case MergeTreeData::MergingParams::Aggregating: + merged = std::make_shared(to_merge, sort_description, max_block_size); + break; + + case MergeTreeData::MergingParams::Replacing: /// TODO Make ReplacingFinalBlockInputStream + merged = std::make_shared(to_merge, + sort_description, data.merging_params.version_column, max_block_size); + break; + + case MergeTreeData::MergingParams::VersionedCollapsing: /// TODO Make VersionedCollapsingFinalBlockInputStream + merged = std::make_shared( + to_merge, sort_description, data.merging_params.sign_column, max_block_size); + break; + + case MergeTreeData::MergingParams::Graphite: + throw Exception("GraphiteMergeTree doesn't support FINAL", ErrorCodes::LOGICAL_ERROR); + } + + return {merged}; +} + + +void MergeTreeDataSelectExecutor::createPositiveSignCondition( + ExpressionActionsPtr & out_expression, String & out_column, const Context & context) const +{ + auto function = std::make_shared(); + auto arguments = std::make_shared(); + auto sign = std::make_shared(data.merging_params.sign_column); + auto one = std::make_shared(1); + + function->name = "equals"; + function->arguments = arguments; + function->children.push_back(arguments); + + arguments->children.push_back(sign); + arguments->children.push_back(one); + + ASTPtr query = function; + auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, data.getColumns().getAllPhysical()); + out_expression = ExpressionAnalyzer(query, syntax_result, context).getActions(false); + out_column = function->getColumnName(); +} + /// Calculates a set of mark ranges, that could possibly contain keys, required by condition. /// In other words, it removes subranges from whole range, that definitely could not contain required keys. - MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( - const MergeTreeData::DataPart::Index & index, const KeyCondition & key_condition, const Settings & settings) const +MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( + const MergeTreeData::DataPart::Index & index, const KeyCondition & key_condition, const Settings & settings) const +{ + MarkRanges res; + + size_t marks_count = index.at(0)->size(); + if (marks_count == 0) + return res; + + /// If index is not used. + if (key_condition.alwaysUnknownOrTrue()) { - MarkRanges res; + res.push_back(MarkRange(0, marks_count)); + } + else + { + size_t used_key_size = key_condition.getMaxKeyColumn() + 1; + size_t min_marks_for_seek = (settings.merge_tree_min_rows_for_seek + data.index_granularity - 1) / data.index_granularity; - size_t marks_count = index.at(0)->size(); - if (marks_count == 0) - return res; + /** There will always be disjoint suspicious segments on the stack, the leftmost one at the top (back). + * At each step, take the left segment and check if it fits. + * If fits, split it into smaller ones and put them on the stack. If not, discard it. + * If the segment is already of one mark length, add it to response and discard it. + */ + std::vector ranges_stack{ {0, marks_count} }; + + /// NOTE Creating temporary Field objects to pass to KeyCondition. + Row index_left(used_key_size); + Row index_right(used_key_size); + + while (!ranges_stack.empty())/// In other words, it removes subranges from whole range, that definitely could not contain required keys. - /// If index is not used. - if (key_condition.alwaysUnknownOrTrue()) { - res.push_back(MarkRange(0, marks_count)); - } - else - { - size_t used_key_size = key_condition.getMaxKeyColumn() + 1; - size_t min_marks_for_seek = (settings.merge_tree_min_rows_for_seek + data.index_granularity - 1) / data.index_granularity; - - /** There will always be disjoint suspicious segments on the stack, the leftmost one at the top (back). - * At each step, take the left segment and check if it fits. - * If fits, split it into smaller ones and put them on the stack. If not, discard it. - * If the segment is already of one mark length, add it to response and discard it. - */ - std::vector ranges_stack{ {0, marks_count} }; - - /// NOTE Creating temporary Field objects to pass to KeyCondition. - Row index_left(used_key_size); - Row index_right(used_key_size); - - while (!ranges_stack.empty())/// In other words, it removes subranges from whole range, that definitely could not contain required keys. + MarkRange range = ranges_stack.back(); + ranges_stack.pop_back(); + bool may_be_true; + if (range.end == marks_count) { - MarkRange range = ranges_stack.back(); - ranges_stack.pop_back(); - - bool may_be_true; - if (range.end == marks_count) + for (size_t i = 0; i < used_key_size; ++i) { - for (size_t i = 0; i < used_key_size; ++i) - { - index[i]->get(range.begin, index_left[i]); - } - - may_be_true = key_condition.mayBeTrueAfter( - used_key_size, index_left.data(), data.primary_key_data_types); + index[i]->get(range.begin, index_left[i]); } + + may_be_true = key_condition.mayBeTrueAfter( + used_key_size, index_left.data(), data.primary_key_data_types); + } + else + { + for (size_t i = 0; i < used_key_size; ++i) + { + index[i]->get(range.begin, index_left[i]); + index[i]->get(range.end, index_right[i]); + } + + may_be_true = key_condition.mayBeTrueInRange( + used_key_size, index_left.data(), index_right.data(), data.primary_key_data_types); + } + + if (!may_be_true) + continue; + + if (range.end == range.begin + 1) + { + /// We saw a useful gap between neighboring marks. Either add it to the last range, or start a new range. + if (res.empty() || range.begin - res.back().end > min_marks_for_seek) // is it a bug?? + res.push_back(range); else - { - for (size_t i = 0; i < used_key_size; ++i) - { - index[i]->get(range.begin, index_left[i]); - index[i]->get(range.end, index_right[i]); - } + res.back().end = range.end; + } + else + { + /// Break the segment and put the result on the stack from right to left. + size_t step = (range.end - range.begin - 1) / settings.merge_tree_coarse_index_granularity + 1; + size_t end; - may_be_true = key_condition.mayBeTrueInRange( - used_key_size, index_left.data(), index_right.data(), data.primary_key_data_types); - } + for (end = range.end; end > range.begin + step; end -= step) + ranges_stack.push_back(MarkRange(end - step, end)); - if (!may_be_true) - continue; - - if (range.end == range.begin + 1) - { - /// We saw a useful gap between neighboring marks. Either add it to the last range, or start a new range. - if (res.empty() || range.begin - res.back().end > min_marks_for_seek) // is it a bug?? - res.push_back(range); - else - res.back().end = range.end; - } - else - { - /// Break the segment and put the result on the stack from right to left. - size_t step = (range.end - range.begin - 1) / settings.merge_tree_coarse_index_granularity + 1; - size_t end; - - for (end = range.end; end > range.begin + step; end -= step) - ranges_stack.push_back(MarkRange(end - step, end)); - - ranges_stack.push_back(MarkRange(range.begin, end)); - } + ranges_stack.push_back(MarkRange(range.begin, end)); } } - - return res; } - MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( - MergeTreeIndexPtr index, - IndexConditionPtr condition, - MergeTreeData::DataPartPtr part, - const MarkRanges & ranges, - const Settings & settings) const - { - if (!Poco::File(part->getFullPath() + index->getFileName() + ".idx").exists()) { - return ranges; - } + return res; +} - const size_t min_marks_for_seek = (settings.merge_tree_min_rows_for_seek + data.index_granularity - 1) / data.index_granularity; - - MergeTreeIndexReader reader( - index, part, - ((part->marks_count + index->granularity - 1) / index->granularity), - ranges); - - MarkRanges res; - - MergeTreeIndexGranulePtr granule = nullptr; - size_t last_index_mark = 0; - for (const auto & range : ranges) - { - MarkRange index_range( - range.begin / index->granularity, - (range.end + index->granularity - 1) / index->granularity); - - if (last_index_mark != index_range.begin || !granule) { - reader.seek(index_range.begin); - } - - for (size_t index_mark = index_range.begin; index_mark < index_range.end; ++index_mark) - { - if (index_mark != index_range.begin || !granule || last_index_mark != index_range.begin) - granule = reader.read(); - - MarkRange data_range( - std::max(range.begin, index_mark * index->granularity), - std::min(range.end, (index_mark + 1) * index->granularity)); - - if (!condition->mayBeTrueOnGranule(granule)) - continue; - - if (res.empty() || res.back().end - data_range.begin >= min_marks_for_seek) - res.push_back(data_range); - else - res.back().end = data_range.end; - } - - last_index_mark = index_range.end - 1; - } - return res; +MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( + MergeTreeIndexPtr index, + IndexConditionPtr condition, + MergeTreeData::DataPartPtr part, + const MarkRanges & ranges, + const Settings & settings) const +{ + if (!Poco::File(part->getFullPath() + index->getFileName() + ".idx").exists()) { + return ranges; } + const size_t min_marks_for_seek = (settings.merge_tree_min_rows_for_seek + data.index_granularity - 1) / data.index_granularity; + + MergeTreeIndexReader reader( + index, part, + ((part->marks_count + index->granularity - 1) / index->granularity), + ranges); + + MarkRanges res; + + MergeTreeIndexGranulePtr granule = nullptr; + size_t last_index_mark = 0; + for (const auto & range : ranges) + { + MarkRange index_range( + range.begin / index->granularity, + (range.end + index->granularity - 1) / index->granularity); + + if (last_index_mark != index_range.begin || !granule) { + reader.seek(index_range.begin); + } + + for (size_t index_mark = index_range.begin; index_mark < index_range.end; ++index_mark) + { + if (index_mark != index_range.begin || !granule || last_index_mark != index_range.begin) + granule = reader.read(); + + MarkRange data_range( + std::max(range.begin, index_mark * index->granularity), + std::min(range.end, (index_mark + 1) * index->granularity)); + + if (!condition->mayBeTrueOnGranule(granule)) + continue; + + if (res.empty() || res.back().end - data_range.begin >= min_marks_for_seek) + res.push_back(data_range); + else + res.back().end = data_range.end; + } + + last_index_mark = index_range.end - 1; + } + return res; +} + } \ No newline at end of file From 6eeed48e862ec132fa413726f9e4ec3289447ea3 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 8 Jan 2019 23:17:45 +0300 Subject: [PATCH 040/586] fixed select --- dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp index 0cdea36621a..0b9b990ba98 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp @@ -35,6 +35,8 @@ void MergeTreeMinMaxGranule::deserializeBinary(ReadBuffer & istr) type->deserializeBinary(min_val, istr); Field max_val; type->deserializeBinary(max_val, istr); + + parallelogram.emplace_back(min_val, true, max_val, true); } emp = true; } From e580180efcb999385ee035202db0a95edf6a0bcf Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 9 Jan 2019 12:54:18 +0300 Subject: [PATCH 041/586] fixed merging --- .../MergeTree/MergeTreeDataMergerMutator.cpp | 29 ++++++++++++++----- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 232146ea417..f2b2d9dbdaa 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -637,19 +637,16 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor BlockInputStreamPtr stream = std::move(input); for (const auto & index : data.indexes) { - stream = std::make_shared(stream, index->expr); + stream = std::make_shared( + std::make_shared(stream, index->expr)); } if (data.hasPrimaryKey()) { - stream = std::make_shared( - BlockInputStreamPtr(std::move(stream)), data.sorting_key_expr); + stream = std::make_shared( + std::make_shared(stream, data.sorting_key_expr)); } - if (!data.indexes.empty() || data.hasPrimaryKey()) { - src_streams.emplace_back(std::make_shared(stream)); - } else { - src_streams.emplace_back(stream); - } + src_streams.emplace_back(stream); } Names sort_columns = data.sorting_key_columns; @@ -658,6 +655,15 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor sort_description.reserve(sort_columns_size); Block header = src_streams.at(0)->getHeader(); + + for (size_t i = 0; i < src_streams.size(); ++i) { + LOG_DEBUG(log, "merging header " << i << "\n"); + auto tmp_h = src_streams.at(i)->getHeader(); + for (auto column : tmp_h.getNames()) { + LOG_DEBUG(log, "column: " << column); + } + } + for (size_t i = 0; i < sort_columns_size; ++i) sort_description.emplace_back(header.getPositionByName(sort_columns[i]), 1, 1); @@ -720,6 +726,13 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor Block block; while (!actions_blocker.isCancelled() && (block = merged_stream->read())) { + LOG_DEBUG(log, "merging\n"); + for (auto column : block.getNames()) + { + LOG_DEBUG(log, "column: " << column); + } + LOG_DEBUG(log, ">>>>>> rows read:: " << block.rows()); + rows_written += block.rows(); to.write(block); From 91fb17f27f99e0bbd5d37f0aa3c30e50a9ceeed2 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 9 Jan 2019 12:55:28 +0300 Subject: [PATCH 042/586] fixed mutation --- dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index f2b2d9dbdaa..4e51c744a1c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -930,14 +930,13 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor /// All columns are modified, proceed to write a new part from scratch. for (const auto & index : data.indexes) { - in = std::make_shared(in, index->expr); + in = std::make_shared( + std::make_shared(in, index->expr)); } if (data.hasPrimaryKey()) in = std::make_shared( std::make_shared(in, data.primary_key_expr)); - else if (!data.indexes.empty()) { - in = std::make_shared(in); } MergeTreeDataPart::MinMaxIndex minmax_idx; From 1e8fa5d9ea774358d916d479a174296a22d10166 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 9 Jan 2019 17:15:23 +0300 Subject: [PATCH 043/586] working minmax --- .../MergeTree/MergeTreeDataMergerMutator.cpp | 19 +----- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 8 +++ .../MergeTree/MergeTreeDataWriter.cpp | 6 -- .../src/Storages/MergeTree/MergeTreeIndexes.h | 1 + .../MergeTree/MergeTreeMinMaxIndex.cpp | 64 ++++++++++++++++--- .../Storages/MergeTree/MergeTreeMinMaxIndex.h | 5 +- .../Storages/MergeTree/MergeTreeTestIndex.h | 4 ++ 7 files changed, 71 insertions(+), 36 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 4e51c744a1c..302ed852926 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -656,14 +656,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor Block header = src_streams.at(0)->getHeader(); - for (size_t i = 0; i < src_streams.size(); ++i) { - LOG_DEBUG(log, "merging header " << i << "\n"); - auto tmp_h = src_streams.at(i)->getHeader(); - for (auto column : tmp_h.getNames()) { - LOG_DEBUG(log, "column: " << column); - } - } - for (size_t i = 0; i < sort_columns_size; ++i) sort_description.emplace_back(header.getPositionByName(sort_columns[i]), 1, 1); @@ -726,13 +718,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor Block block; while (!actions_blocker.isCancelled() && (block = merged_stream->read())) { - LOG_DEBUG(log, "merging\n"); - for (auto column : block.getNames()) - { - LOG_DEBUG(log, "column: " << column); - } - LOG_DEBUG(log, ">>>>>> rows read:: " << block.rows()); - rows_written += block.rows(); to.write(block); @@ -929,15 +914,13 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor { /// All columns are modified, proceed to write a new part from scratch. - for (const auto & index : data.indexes) { + for (const auto & index : data.indexes) in = std::make_shared( std::make_shared(in, index->expr)); - } if (data.hasPrimaryKey()) in = std::make_shared( std::make_shared(in, data.primary_key_expr)); - } MergeTreeDataPart::MinMaxIndex minmax_idx; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index efa18a7d5d5..5d398bcbacd 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -996,9 +996,17 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( MarkRange data_range( std::max(range.begin, index_mark * index->granularity), std::min(range.end, (index_mark + 1) * index->granularity)); + LOG_DEBUG(log, "drop out:: " << " data_range [" << + data_range.begin << ", " << data_range.end << ") index_mark = " << index_mark << + " granule data: "); + + LOG_DEBUG(log, granule->toString()); if (!condition->mayBeTrueOnGranule(granule)) + { + LOG_DEBUG(log, "DROP"); continue; + } if (res.empty() || res.back().end - data_range.begin >= min_marks_for_seek) res.push_back(data_range); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 5e5a7fecd21..a0fca13f34a 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -216,12 +216,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa for (auto index : data.indexes) { - auto index_columns = index->expr->getRequiredColumnsWithTypes(); - for (const auto & column : index_columns) - { - if (!block.has(column.name)) - block.insert(ColumnWithTypeAndName(column.type, column.name)); - } index->expr->execute(block); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h index 515d2843548..62049bed322 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h @@ -31,6 +31,7 @@ struct MergeTreeIndexGranule virtual void serializeBinary(WriteBuffer & ostr) const = 0; virtual void deserializeBinary(ReadBuffer & istr) = 0; + virtual String toString() const = 0; virtual bool empty() const = 0; virtual void update(const Block & block, size_t * pos, size_t limit) = 0; diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp index 0b9b990ba98..c4b49e51b3b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp @@ -1,13 +1,12 @@ #include - +#include namespace DB { MergeTreeMinMaxGranule::MergeTreeMinMaxGranule(const MergeTreeMinMaxIndex & index) - : MergeTreeIndexGranule(), emp(true), index(index) + : MergeTreeIndexGranule(), index(index), parallelogram() { - parallelogram.reserve(index.columns.size()); } void MergeTreeMinMaxGranule::serializeBinary(WriteBuffer & ostr) const @@ -15,11 +14,18 @@ void MergeTreeMinMaxGranule::serializeBinary(WriteBuffer & ostr) const if (empty()) throw Exception( "Attempt to write empty minmax index `" + index.name + "`", ErrorCodes::LOGICAL_ERROR); + Poco::Logger * log = &Poco::Logger::get("minmax_idx"); + + LOG_DEBUG(log, "serializeBinary Granule"); for (size_t i = 0; i < index.columns.size(); ++i) { const DataTypePtr & type = index.data_types[i]; + LOG_DEBUG(log, "parallel " << i << " :: " + << applyVisitor(FieldVisitorToString(), parallelogram[i].left) << " " + << applyVisitor(FieldVisitorToString(), parallelogram[i].right)); + type->serializeBinary(parallelogram[i].left, ostr); type->serializeBinary(parallelogram[i].right, ostr); } @@ -27,6 +33,10 @@ void MergeTreeMinMaxGranule::serializeBinary(WriteBuffer & ostr) const void MergeTreeMinMaxGranule::deserializeBinary(ReadBuffer & istr) { + Poco::Logger * log = &Poco::Logger::get("minmax_idx"); + + LOG_DEBUG(log, "deserializeBinary Granule"); + parallelogram.clear(); for (size_t i = 0; i < index.columns.size(); ++i) { const DataTypePtr & type = index.data_types[i]; @@ -36,25 +46,51 @@ void MergeTreeMinMaxGranule::deserializeBinary(ReadBuffer & istr) Field max_val; type->deserializeBinary(max_val, istr); + LOG_DEBUG(log, "parallel " << i << " :: " + << applyVisitor(FieldVisitorToString(), min_val) << " " + << applyVisitor(FieldVisitorToString(), max_val)); + parallelogram.emplace_back(min_val, true, max_val, true); } - emp = true; +} + +String MergeTreeMinMaxGranule::toString() const +{ + String res = "minmax granule: "; + + for (size_t i = 0; i < parallelogram.size(); ++i) + { + res += "[" + + applyVisitor(FieldVisitorToString(), parallelogram[i].left) + ", " + + applyVisitor(FieldVisitorToString(), parallelogram[i].right) + "]"; + } + + return res; } void MergeTreeMinMaxGranule::update(const Block & block, size_t * pos, size_t limit) { + Poco::Logger * log = &Poco::Logger::get("minmax_idx"); + + LOG_DEBUG(log, "update Granule " << parallelogram.size() + << " pos: "<< *pos << " limit: " << limit << " rows: " << block.rows()); + size_t rows_read = 0; for (size_t i = 0; i < index.columns.size(); ++i) { + LOG_DEBUG(log, "granule column: " << index.columns[i]); + auto column = block.getByName(index.columns[i]).column; size_t cur; /// TODO: more effective (index + getExtremes??) for (cur = 0; cur < limit && cur + *pos < column->size(); ++cur) { Field field; - column->get(i, field); - if (parallelogram.size() < i) + column->get(cur + *pos, field); + LOG_DEBUG(log, "upd:: " << applyVisitor(FieldVisitorToString(), field)); + if (parallelogram.size() <= i) { + LOG_DEBUG(log, "emplaced"); parallelogram.emplace_back(field, true, field, true); } else @@ -63,12 +99,14 @@ void MergeTreeMinMaxGranule::update(const Block & block, size_t * pos, size_t li parallelogram[i].right = std::max(parallelogram[i].right, field); } } + LOG_DEBUG(log, "res:: [" + << applyVisitor(FieldVisitorToString(), parallelogram[i].left) << ", " + << applyVisitor(FieldVisitorToString(), parallelogram[i].right) << "]"); rows_read = cur; } + LOG_DEBUG(log, "updated rows_read: " << rows_read); *pos += rows_read; - if (rows_read > 0) - emp = false; }; @@ -128,12 +166,18 @@ std::unique_ptr MergeTreeMinMaxIndexCreator( auto minmax = std::make_unique( node->name, std::move(minmax_expr), node->granularity.get()); - const auto & columns_with_types = minmax->expr->getRequiredColumnsWithTypes(); + auto sample = ExpressionAnalyzer(expr_list, syntax, context) + .getActions(true)->getSampleBlock(); - for (const auto & column : columns_with_types) + Poco::Logger * log = &Poco::Logger::get("minmax_idx"); + LOG_DEBUG(log, "new minmax index"); + for (size_t i = 0; i < expr_list->children.size(); ++i) { + const auto & column = sample.getByPosition(i); + minmax->columns.emplace_back(column.name); minmax->data_types.emplace_back(column.type); + LOG_DEBUG(log, ">" << column.name << " " << column.type->getName()); } return minmax; diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h index 266cfbf04bc..3311d8815e9 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h @@ -29,13 +29,14 @@ struct MergeTreeMinMaxGranule : public MergeTreeIndexGranule void serializeBinary(WriteBuffer & ostr) const override; void deserializeBinary(ReadBuffer & istr) override; - bool empty() const override { return emp; } + + String toString() const override; + bool empty() const override { return parallelogram.empty(); } void update(const Block & block, size_t * pos, size_t limit) override; ~MergeTreeMinMaxGranule() override = default; - bool emp; const MergeTreeMinMaxIndex & index; std::vector parallelogram; }; diff --git a/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h b/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h index 64d298661b9..93c002f8295 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h @@ -36,6 +36,10 @@ struct MergeTreeTestGranule : public MergeTreeIndexGranule { //std::cerr << "TESTINDEX: read " << emp << "\n"; } + String toString() const override { + return "test_index"; + } + bool empty() const override { return emp == 0; } From d8f8b6352b236cce0690f4b5a80f8b9563d8f817 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 9 Jan 2019 17:30:25 +0300 Subject: [PATCH 044/586] removed test index --- .../Storages/MergeTree/MergeTreeTestIndex.cpp | 2 - .../Storages/MergeTree/MergeTreeTestIndex.h | 97 ------------------- .../MergeTree/registerStorageMergeTree.cpp | 2 - 3 files changed, 101 deletions(-) delete mode 100644 dbms/src/Storages/MergeTree/MergeTreeTestIndex.cpp delete mode 100644 dbms/src/Storages/MergeTree/MergeTreeTestIndex.h diff --git a/dbms/src/Storages/MergeTree/MergeTreeTestIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeTestIndex.cpp deleted file mode 100644 index 29e15b66503..00000000000 --- a/dbms/src/Storages/MergeTree/MergeTreeTestIndex.cpp +++ /dev/null @@ -1,2 +0,0 @@ -#include - diff --git a/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h b/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h deleted file mode 100644 index 93c002f8295..00000000000 --- a/dbms/src/Storages/MergeTree/MergeTreeTestIndex.h +++ /dev/null @@ -1,97 +0,0 @@ -#pragma once - -#include -#include -#include - -#include -#include - -#include -#include - -namespace DB { - -namespace ErrorCodes -{ - extern const int FILE_DOESNT_EXIST; -} - - -class MergeTreeTestIndex; - -struct MergeTreeTestGranule : public MergeTreeIndexGranule { - ~MergeTreeTestGranule() override = default;; - - void serializeBinary(WriteBuffer &ostr) const override { - //std::cerr << "TESTINDEX: written " << emp << "\n"; - writeIntBinary(emp, ostr); - } - - void deserializeBinary(ReadBuffer &istr) override { - readIntBinary(emp, istr); - if (emp != 10) { - throw Exception("kek bad read", ErrorCodes::FILE_DOESNT_EXIST); - } - //std::cerr << "TESTINDEX: read " << emp << "\n"; - } - - String toString() const override { - return "test_index"; - } - - bool empty() const override { - return emp == 0; - } - - void update(const Block &block, size_t *pos, size_t limit) override { - *pos += std::min(limit, block.rows() - *pos); - emp = 10; - }; - - Int32 emp = 0; -}; - -class IndexTestCondition : public IndexCondition{ -public: - IndexTestCondition(int) {}; - ~IndexTestCondition() override = default; - - /// Checks if this index is useful for query. - bool alwaysUnknownOrTrue() const override { return false; }; - - bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr) const override { - return true; - } - -}; - - -class MergeTreeTestIndex : public MergeTreeIndex -{ -public: - MergeTreeTestIndex(String name, ExpressionActionsPtr expr, size_t granularity) - : MergeTreeIndex(name, expr, granularity) {} - - ~MergeTreeTestIndex() override = default; - - /// gets filename without extension - - MergeTreeIndexGranulePtr createIndexGranule() const override { - return std::make_shared(); - } - - IndexConditionPtr createIndexCondition( - const SelectQueryInfo & , const Context & ) const override { - return std::make_shared(4); - }; - -}; - -std::unique_ptr MTItestCreator( - const MergeTreeData & data, std::shared_ptr node, const Context & ) { - return std::make_unique( - node->name, data.primary_key_expr, node->granularity.get()); -} - -} \ No newline at end of file diff --git a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp index 8e964c80357..1f32a7443a9 100644 --- a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include @@ -638,7 +637,6 @@ static StoragePtr create(const StorageFactory::Arguments & args) static void registerMergeTreeSkipIndexes() { auto & factory = MergeTreeIndexFactory::instance(); - factory.registerIndex("test", MTItestCreator); factory.registerIndex("minmax", MergeTreeMinMaxIndexCreator); } From 0ba6f1421ac500bf10d40a20dacd6fe25f38f1b3 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 9 Jan 2019 20:05:52 +0300 Subject: [PATCH 045/586] fixed style --- dbms/src/Parsers/tests/create_parser.cpp | 2 +- .../MergeTree/MergeTreeDataMergerMutator.cpp | 8 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 85 +++++++++---------- .../MergeTree/MergeTreeDataSelectExecutor.h | 10 +-- .../MergeTree/MergeTreeDataWriter.cpp | 2 - .../MergeTree/MergeTreeIndexReader.cpp | 10 +-- .../Storages/MergeTree/MergeTreeIndexReader.h | 8 +- .../Storages/MergeTree/MergeTreeIndexes.cpp | 6 +- .../src/Storages/MergeTree/MergeTreeIndexes.h | 8 +- .../MergeTree/MergeTreeMinMaxIndex.cpp | 11 ++- .../Storages/MergeTree/MergeTreeMinMaxIndex.h | 12 +-- .../MergeTree/MergedBlockOutputStream.cpp | 6 +- 12 files changed, 78 insertions(+), 90 deletions(-) diff --git a/dbms/src/Parsers/tests/create_parser.cpp b/dbms/src/Parsers/tests/create_parser.cpp index 4139c930ba2..6137d4d18da 100644 --- a/dbms/src/Parsers/tests/create_parser.cpp +++ b/dbms/src/Parsers/tests/create_parser.cpp @@ -10,7 +10,7 @@ int main(int, char **) { using namespace DB; - std::string input = "CREATE TABLE hits (URL String, UserAgentMinor2 FixedString(2), EventTime DateTime) ENGINE = MergeTree() ORDER BY EventTime INDEX minmax1 BY (lower(URL), EventTime) TYPE MINMAX(1,2,3) GRANULARITY 10"; + std::string input = "CREATE TABLE hits (URL String, UserAgentMinor2 FixedString(2), EventTime DateTime) ENGINE = Log"; ParserCreateQuery parser; ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 302ed852926..cd635a36b8b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -655,7 +655,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor sort_description.reserve(sort_columns_size); Block header = src_streams.at(0)->getHeader(); - for (size_t i = 0; i < sort_columns_size; ++i) sort_description.emplace_back(header.getPositionByName(sort_columns[i]), 1, 1); @@ -823,8 +822,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor Poco::File(rows_sources_file_path).remove(); } - // TODO: здесь надо как-то мержить индекс или в MergedBlockOutputStream - for (const auto & part : parts) new_data_part->minmax_idx.merge(part->minmax_idx); @@ -933,8 +930,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor while (check_not_cancelled() && (block = in->read())) { minmax_idx.update(block, data.minmax_idx_columns); - // TODO: насчитывать индексы - /// Supposing data is sorted we can calculate indexes there out.write(block); } @@ -949,7 +944,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor /// We will modify only some of the columns. Other columns and key values can be copied as-is. /// TODO: check that we modify only non-key columns in this case. - /// TODO: more effective check + /// Checks if columns used in skipping indexes modified/ for (const auto & col : in_header.getNames()) { for (const auto index : data.indexes) { const auto & index_cols = index->expr->getRequiredColumns(); @@ -963,7 +958,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor } NameSet files_to_skip = {"checksums.txt", "columns.txt"}; - for (const auto & entry : in_header) { IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 5d398bcbacd..1c6cbad14ea 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -73,7 +73,7 @@ namespace ErrorCodes MergeTreeDataSelectExecutor::MergeTreeDataSelectExecutor(const MergeTreeData & data_) - : data(data_), log(&Logger::get(data.getLogName() + " (SelectExecutor)")) + : data(data_), log(&Logger::get(data.getLogName() + " (SelectExecutor)")) { } @@ -91,7 +91,7 @@ static Block getBlockWithPartColumn(const MergeTreeData::DataPartsVector & parts size_t MergeTreeDataSelectExecutor::getApproximateTotalRowsToRead( - const MergeTreeData::DataPartsVector & parts, const KeyCondition & key_condition, const Settings & settings) const + const MergeTreeData::DataPartsVector & parts, const KeyCondition & key_condition, const Settings & settings) const { size_t full_marks_count = 0; @@ -137,12 +137,12 @@ static RelativeSize convertAbsoluteSampleSizeToRelative(const ASTPtr & node, siz BlockInputStreams MergeTreeDataSelectExecutor::read( - const Names & column_names_to_return, - const SelectQueryInfo & query_info, - const Context & context, - const size_t max_block_size, - const unsigned num_streams, - const PartitionIdToMaxBlock * max_block_numbers_to_read) const + const Names & column_names_to_return, + const SelectQueryInfo & query_info, + const Context & context, + const size_t max_block_size, + const unsigned num_streams, + const PartitionIdToMaxBlock * max_block_numbers_to_read) const { return readFromParts( data.getDataPartsVector(), column_names_to_return, query_info, context, @@ -150,13 +150,13 @@ BlockInputStreams MergeTreeDataSelectExecutor::read( } BlockInputStreams MergeTreeDataSelectExecutor::readFromParts( - MergeTreeData::DataPartsVector parts, - const Names & column_names_to_return, - const SelectQueryInfo & query_info, - const Context & context, - const size_t max_block_size, - const unsigned num_streams, - const PartitionIdToMaxBlock * max_block_numbers_to_read) const + MergeTreeData::DataPartsVector parts, + const Names & column_names_to_return, + const SelectQueryInfo & query_info, + const Context & context, + const size_t max_block_size, + const unsigned num_streams, + const PartitionIdToMaxBlock * max_block_numbers_to_read) const { size_t part_index = 0; @@ -392,7 +392,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts( size_of_universum = RelativeSize(std::numeric_limits::max()) + RelativeSize(1); else throw Exception("Invalid sampling column type in storage parameters: " + type->getName() + ". Must be unsigned integer type.", - ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); + ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); if (settings.parallel_replicas_count > 1) { @@ -552,7 +552,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts( } LOG_DEBUG(log, "Selected " << parts.size() << " parts by date, " << parts_with_ranges.size() << " parts by key, " - << sum_marks << " marks to read from " << sum_ranges << " ranges"); + << sum_marks << " marks to read from " << sum_ranges << " ranges"); if (parts_with_ranges.empty()) return {}; @@ -618,14 +618,14 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts( BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( - RangesInDataParts && parts, - size_t num_streams, - const Names & column_names, - size_t max_block_size, - bool use_uncompressed_cache, - const PrewhereInfoPtr & prewhere_info, - const Names & virt_columns, - const Settings & settings) const + RangesInDataParts && parts, + size_t num_streams, + const Names & column_names, + size_t max_block_size, + bool use_uncompressed_cache, + const PrewhereInfoPtr & prewhere_info, + const Names & virt_columns, + const Settings & settings) const { const size_t min_marks_for_concurrent_read = (settings.merge_tree_min_rows_for_concurrent_read + data.index_granularity - 1) / data.index_granularity; @@ -761,13 +761,13 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( } BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( - RangesInDataParts && parts, - const Names & column_names, - size_t max_block_size, - bool use_uncompressed_cache, - const PrewhereInfoPtr & prewhere_info, - const Names & virt_columns, - const Settings & settings) const + RangesInDataParts && parts, + const Names & column_names, + size_t max_block_size, + bool use_uncompressed_cache, + const PrewhereInfoPtr & prewhere_info, + const Names & virt_columns, + const Settings & settings) const { const size_t max_marks_to_use_cache = (settings.merge_tree_max_rows_to_use_cache + data.index_granularity - 1) / data.index_granularity; @@ -820,7 +820,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal case MergeTreeData::MergingParams::Summing: merged = std::make_shared(to_merge, - sort_description, data.merging_params.columns_to_sum, max_block_size); + sort_description, data.merging_params.columns_to_sum, max_block_size); break; case MergeTreeData::MergingParams::Aggregating: @@ -829,7 +829,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal case MergeTreeData::MergingParams::Replacing: /// TODO Make ReplacingFinalBlockInputStream merged = std::make_shared(to_merge, - sort_description, data.merging_params.version_column, max_block_size); + sort_description, data.merging_params.version_column, max_block_size); break; case MergeTreeData::MergingParams::VersionedCollapsing: /// TODO Make VersionedCollapsingFinalBlockInputStream @@ -846,7 +846,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal void MergeTreeDataSelectExecutor::createPositiveSignCondition( - ExpressionActionsPtr & out_expression, String & out_column, const Context & context) const + ExpressionActionsPtr & out_expression, String & out_column, const Context & context) const { auto function = std::make_shared(); auto arguments = std::make_shared(); @@ -870,7 +870,7 @@ void MergeTreeDataSelectExecutor::createPositiveSignCondition( /// Calculates a set of mark ranges, that could possibly contain keys, required by condition. /// In other words, it removes subranges from whole range, that definitely could not contain required keys. MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( - const MergeTreeData::DataPart::Index & index, const KeyCondition & key_condition, const Settings & settings) const + const MergeTreeData::DataPart::Index & index, const KeyCondition & key_condition, const Settings & settings) const { MarkRanges res; @@ -899,8 +899,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( Row index_left(used_key_size); Row index_right(used_key_size); - while (!ranges_stack.empty())/// In other words, it removes subranges from whole range, that definitely could not contain required keys. - + while (!ranges_stack.empty()) { MarkRange range = ranges_stack.back(); ranges_stack.pop_back(); @@ -957,11 +956,11 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( } MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( - MergeTreeIndexPtr index, - IndexConditionPtr condition, - MergeTreeData::DataPartPtr part, - const MarkRanges & ranges, - const Settings & settings) const + MergeTreeIndexPtr index, + IndexConditionPtr condition, + MergeTreeData::DataPartPtr part, + const MarkRanges & ranges, + const Settings & settings) const { if (!Poco::File(part->getFullPath() + index->getFileName() + ".idx").exists()) { return ranges; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 8010cc9c889..22f56a14e27 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -83,11 +83,11 @@ private: const Settings & settings) const; MarkRanges filterMarksUsingIndex( - MergeTreeIndexPtr index, - IndexConditionPtr condition, - MergeTreeData::DataPartPtr part, - const MarkRanges & ranges, - const Settings & settings) const; + MergeTreeIndexPtr index, + IndexConditionPtr condition, + MergeTreeData::DataPartPtr part, + const MarkRanges & ranges, + const Settings & settings) const; }; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index a0fca13f34a..18df3905846 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -215,9 +215,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa MergedBlockOutputStream out(data, new_data_part->getFullPath(), columns, compression_codec); for (auto index : data.indexes) - { index->expr->execute(block); - } out.writePrefix(); out.writeWithPermutation(block, perm_ptr); diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexReader.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexReader.cpp index f81f325b065..0618727527b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexReader.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexReader.cpp @@ -4,11 +4,11 @@ namespace DB { MergeTreeIndexReader::MergeTreeIndexReader( - MergeTreeIndexPtr index, MergeTreeData::DataPartPtr part, size_t marks_count, const MarkRanges & all_mark_ranges) - : index(index), stream( - part->getFullPath() + index->getFileName(), ".idx", marks_count, - all_mark_ranges, nullptr, false, nullptr, 0, DBMS_DEFAULT_BUFFER_SIZE, - ReadBufferFromFileBase::ProfileCallback{}, CLOCK_MONOTONIC_COARSE) { + MergeTreeIndexPtr index, MergeTreeData::DataPartPtr part, size_t marks_count, const MarkRanges & all_mark_ranges) + : index(index), stream( + part->getFullPath() + index->getFileName(), ".idx", marks_count, + all_mark_ranges, nullptr, false, nullptr, 0, DBMS_DEFAULT_BUFFER_SIZE, + ReadBufferFromFileBase::ProfileCallback{}, CLOCK_MONOTONIC_COARSE) { stream.seekToStart(); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexReader.h b/dbms/src/Storages/MergeTree/MergeTreeIndexReader.h index 32275f7f3b2..fee68cc3915 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexReader.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexReader.h @@ -10,10 +10,10 @@ namespace DB { class MergeTreeIndexReader { public: MergeTreeIndexReader( - MergeTreeIndexPtr index, - MergeTreeData::DataPartPtr part, - size_t marks_count, - const MarkRanges & all_mark_ranges); + MergeTreeIndexPtr index, + MergeTreeData::DataPartPtr part, + size_t marks_count, + const MarkRanges & all_mark_ranges); void seek(size_t mark); diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp index 02f58fe6275..a0bc956ea65 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp @@ -27,9 +27,9 @@ void MergeTreeIndexFactory::registerIndex(const std::string &name, Creator creat } std::unique_ptr MergeTreeIndexFactory::get( - const MergeTreeData & data, - std::shared_ptr node, - const Context & context) const + const MergeTreeData & data, + std::shared_ptr node, + const Context & context) const { if (!node->type) throw Exception( diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h index 62049bed322..416f3fc6184 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h @@ -59,7 +59,7 @@ class MergeTreeIndex { public: MergeTreeIndex(String name, ExpressionActionsPtr expr, size_t granularity) - : name(name), expr(expr), granularity(granularity) {} + : name(name), expr(expr), granularity(granularity) {} virtual ~MergeTreeIndex() = default; @@ -94,9 +94,9 @@ public: const Context & context)>; std::unique_ptr get( - const MergeTreeData & data, - std::shared_ptr node, - const Context & context) const; + const MergeTreeData & data, + std::shared_ptr node, + const Context & context) const; void registerIndex(const std::string & name, Creator creator); diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp index c4b49e51b3b..1af1ad344d5 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp @@ -124,7 +124,7 @@ bool MinMaxCondition::alwaysUnknownOrTrue() const bool MinMaxCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const { std::shared_ptr granule - = std::dynamic_pointer_cast(idx_granule); + = std::dynamic_pointer_cast(idx_granule); if (!granule) { throw Exception( "Minmax index condition got wrong granule", ErrorCodes::LOGICAL_ERROR); @@ -140,16 +140,16 @@ MergeTreeIndexGranulePtr MergeTreeMinMaxIndex::createIndexGranule() const } IndexConditionPtr MergeTreeMinMaxIndex::createIndexCondition( - const SelectQueryInfo & query, const Context & context) const + const SelectQueryInfo & query, const Context & context) const { return std::make_shared(query, context, *this); }; std::unique_ptr MergeTreeMinMaxIndexCreator( - const MergeTreeData & data, - std::shared_ptr node, - const Context & context) + const MergeTreeData & data, + std::shared_ptr node, + const Context & context) { if (node->name.empty()) throw Exception("Index must have unique name", ErrorCodes::INCORRECT_QUERY); @@ -162,7 +162,6 @@ std::unique_ptr MergeTreeMinMaxIndexCreator( expr_list, data.getColumns().getAllPhysical()); auto minmax_expr = ExpressionAnalyzer(expr_list, syntax, context).getActions(false); - auto minmax = std::make_unique( node->name, std::move(minmax_expr), node->granularity.get()); diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h index 3311d8815e9..3378f00d1a8 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h @@ -45,9 +45,9 @@ class MinMaxCondition : public IndexCondition { public: MinMaxCondition( - const SelectQueryInfo & query, - const Context & context, - const MergeTreeMinMaxIndex & index); + const SelectQueryInfo & query, + const Context & context, + const MergeTreeMinMaxIndex & index); bool alwaysUnknownOrTrue() const override; @@ -64,18 +64,18 @@ class MergeTreeMinMaxIndex : public MergeTreeIndex { public: MergeTreeMinMaxIndex(String name, ExpressionActionsPtr expr, size_t granularity) - : MergeTreeIndex(name, expr, granularity) {} + : MergeTreeIndex(name, expr, granularity) {} ~MergeTreeMinMaxIndex() override = default; MergeTreeIndexGranulePtr createIndexGranule() const override; IndexConditionPtr createIndexCondition( - const SelectQueryInfo & query, const Context & context) const override; + const SelectQueryInfo & query, const Context & context) const override; }; std::unique_ptr MergeTreeMinMaxIndexCreator( - const MergeTreeData & data, std::shared_ptr node, const Context & context); + const MergeTreeData & data, std::shared_ptr node, const Context & context); } \ No newline at end of file diff --git a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp index dad1c8c7a06..b90817c7272 100644 --- a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -476,9 +476,7 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm for (size_t i = 0, size = skip_indexes_column_names.size(); i < size; ++i) { const auto & name = skip_indexes_column_names[i]; - skip_indexes_column_name_to_position.emplace(name, i); - skip_indexes_columns[i] = block.getByName(name); /// Reorder index columns in advance. @@ -519,12 +517,12 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm auto skip_index_column_it = skip_indexes_column_name_to_position.find(it->name); if (primary_key_column_name_to_position.end() != primary_column_it) { - auto & primary_column = *primary_key_columns[primary_column_it->second].column; + const auto & primary_column = *primary_key_columns[primary_column_it->second].column; writeData(column.name, *column.type, primary_column, offset_columns, false, serialization_states[i]); } else if (skip_indexes_column_name_to_position.end() != skip_index_column_it) { - auto & index_column = *skip_indexes_columns[skip_index_column_it->second].column; + const auto & index_column = *skip_indexes_columns[skip_index_column_it->second].column; writeData(column.name, *column.type, index_column, offset_columns, false, serialization_states[i]); } else From d9b7f30245dd4bf5c68397274ead2613a23f8490 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 9 Jan 2019 22:20:50 +0300 Subject: [PATCH 046/586] added indexes to checkDataPart --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 2 +- .../ReplicatedMergeTreePartCheckThread.cpp | 1 + dbms/src/Storages/MergeTree/checkDataPart.cpp | 44 +++++++++++++++++++ dbms/src/Storages/MergeTree/checkDataPart.h | 1 + 4 files changed, 47 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index f4fc9860b67..1c411b3e836 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -2097,7 +2097,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::loadPartAndFixMetadata(const St /// Check the data while we are at it. if (part->checksums.empty()) { - part->checksums = checkDataPart(full_part_path, index_granularity, false, primary_key_data_types); + part->checksums = checkDataPart(full_part_path, index_granularity, false, primary_key_data_types, indexes); { WriteBufferFromFile out(full_part_path + "checksums.txt.tmp", 4096); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index 0c529124698..3c77772bccb 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -225,6 +225,7 @@ void ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name) storage.data.index_granularity, true, storage.data.primary_key_data_types, + storage.data.indexes, [this] { return need_stop.load(); }); if (need_stop) diff --git a/dbms/src/Storages/MergeTree/checkDataPart.cpp b/dbms/src/Storages/MergeTree/checkDataPart.cpp index 2b5da264f78..dc145ef55ca 100644 --- a/dbms/src/Storages/MergeTree/checkDataPart.cpp +++ b/dbms/src/Storages/MergeTree/checkDataPart.cpp @@ -137,6 +137,7 @@ MergeTreeData::DataPart::Checksums checkDataPart( size_t index_granularity, bool require_checksums, const DataTypes & primary_key_data_types, + const MergeTreeIndexes & indexes, std::function is_cancelled) { Logger * log = &Logger::get("checkDataPart"); @@ -241,6 +242,49 @@ MergeTreeData::DataPart::Checksums checkDataPart( rows = count; } + /// Read and check skip indexes + for (const auto index : indexes) + { + LOG_DEBUG(log, "Checking index " << index->name << " in " << path); + Stream stream(path, index->getFileName(), ".idx"); + size_t mark_num = 0; + + while (!stream.uncompressed_hashing_buf.eof()) + { + if (stream.mrk_hashing_buf.eof()) + throw Exception("Unexpected end of mrk file while reading index " + index->name, + ErrorCodes::CORRUPTED_DATA); + try + { + stream.assertMark(); + } + catch (Exception &e) + { + e.addMessage("Cannot read mark " + toString(mark_num) + + " in file " + stream.mrk_file_path + + ", mrk file offset: " + toString(stream.mrk_hashing_buf.count())); + throw; + } + try + { + index->createIndexGranule()->deserializeBinary(stream.uncompressed_hashing_buf); + } + catch (Exception &e) + { + e.addMessage("Cannot read granule " + toString(mark_num) + + " in file " + stream.bin_file_path + + ", mrk file offset: " + toString(stream.mrk_hashing_buf.count())); + throw; + } + ++mark_num; + if (is_cancelled()) + return {}; + } + + stream.assertEnd(); + stream.saveChecksums(checksums_data); + } + /// Read all columns, calculate checksums and validate marks. for (const NameAndTypePair & name_type : columns) { diff --git a/dbms/src/Storages/MergeTree/checkDataPart.h b/dbms/src/Storages/MergeTree/checkDataPart.h index 584729a0029..30aa2ebf68e 100644 --- a/dbms/src/Storages/MergeTree/checkDataPart.h +++ b/dbms/src/Storages/MergeTree/checkDataPart.h @@ -17,6 +17,7 @@ MergeTreeData::DataPart::Checksums checkDataPart( size_t index_granularity, bool require_checksums, const DataTypes & primary_key_data_types, /// Check the primary key. If it is not necessary, pass an empty array. + const MergeTreeIndexes & indexes = {}, /// Check skip indexes std::function is_cancelled = []{ return false; }); } From 6490c23aa6c7dc48c3617136ffdfcefad2a94434 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 10 Jan 2019 14:17:10 +0300 Subject: [PATCH 047/586] added tests for minmax index --- .../0_stateless/00823_minmax_index.reference | 8 ++++ .../0_stateless/00823_minmax_index.sql | 40 +++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00823_minmax_index.reference create mode 100644 dbms/tests/queries/0_stateless/00823_minmax_index.sql diff --git a/dbms/tests/queries/0_stateless/00823_minmax_index.reference b/dbms/tests/queries/0_stateless/00823_minmax_index.reference new file mode 100644 index 00000000000..046f6502f65 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00823_minmax_index.reference @@ -0,0 +1,8 @@ +0 5 4.7 6.50 cba b 2014-01-04 +0 5 4.7 6.50 cba b 2014-03-11 +2 5 4.7 6.50 cba b 2014-06-11 +2 5 4.7 6.50 cba b 2015-01-01 +0 5 4.7 6.50 cba b 2014-01-04 +0 5 4.7 6.50 cba b 2014-03-11 +2 5 4.7 6.50 cba b 2014-06-11 +2 5 4.7 6.50 cba b 2015-01-01 diff --git a/dbms/tests/queries/0_stateless/00823_minmax_index.sql b/dbms/tests/queries/0_stateless/00823_minmax_index.sql new file mode 100644 index 00000000000..a1ca97f3a7b --- /dev/null +++ b/dbms/tests/queries/0_stateless/00823_minmax_index.sql @@ -0,0 +1,40 @@ +DROP TABLE IF EXISTS test.minmax_idx; + +CREATE TABLE test.minmax_idx +( + u64 UInt64, + i32 Int32, + f64 Float64, + d Decimal(10, 2), + s String, + e Enum8('a' = 1, 'b' = 2, 'c' = 3), + dt Date +) ENGINE = MergeTree() +ORDER BY u64 +INDEXES idx_all BY (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 2, + idx_2 BY (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3 +SETTINGS index_granularity = 2; + + +/* many small inserts => table will make merges */ +INSERT INTO test.minmax_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-01-01'); +INSERT INTO test.minmax_idx VALUES (0, 5, 4.7, 6.5, 'cba', 'b', '2014-01-04'); +INSERT INTO test.minmax_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2017-01-01'); +INSERT INTO test.minmax_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2016-01-01'); +INSERT INTO test.minmax_idx VALUES (2, 5, 4.7, 6.5, 'cba', 'b', '2015-01-01'); +INSERT INTO test.minmax_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-11-11'); + +INSERT INTO test.minmax_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-02-11'); +INSERT INTO test.minmax_idx VALUES (0, 5, 4.7, 6.5, 'cba', 'b', '2014-03-11'); +INSERT INTO test.minmax_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-04-11'); +INSERT INTO test.minmax_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-05-11'); +INSERT INTO test.minmax_idx VALUES (2, 5, 4.7, 6.5, 'cba', 'b', '2014-06-11'); +INSERT INTO test.minmax_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-07-11'); + +/* simple select */ +SELECT * FROM test.minmax_idx WHERE i32 = 5 AND i32 + f64 < 12 AND 3 < d AND d < 7 AND (s = 'bac' OR s = 'cba') ORDER BY dt; + +/* select with hole made by primary key */ +SELECT * FROM test.minmax_idx WHERE u64 != 1 AND e = 'b' ORDER BY dt; + +DROP TABLE test.minmax_idx; \ No newline at end of file From 3f8c2ad814d03221d6ee7446618501abef3b8a16 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 10 Jan 2019 15:57:12 +0300 Subject: [PATCH 048/586] fixed constructor --- dbms/src/Storages/MergeTree/MergeTreeIndexes.h | 15 ++++++++++++--- .../Storages/MergeTree/MergeTreeMinMaxIndex.cpp | 13 +++++++------ .../src/Storages/MergeTree/MergeTreeMinMaxIndex.h | 9 +++++++-- 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h index 416f3fc6184..fab61d07785 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexes.h @@ -58,8 +58,17 @@ using IndexConditionPtr = std::shared_ptr; class MergeTreeIndex { public: - MergeTreeIndex(String name, ExpressionActionsPtr expr, size_t granularity) - : name(name), expr(expr), granularity(granularity) {} + MergeTreeIndex( + String name, + ExpressionActionsPtr expr, + const Names & columns, + const DataTypes & data_types, + size_t granularity) + : name(name) + , expr(expr) + , columns(columns) + , data_types(data_types) + , granularity(granularity) {} virtual ~MergeTreeIndex() = default; @@ -73,9 +82,9 @@ public: String name; ExpressionActionsPtr expr; - size_t granularity; Names columns; DataTypes data_types; + size_t granularity; }; diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp index 1af1ad344d5..d410594bf45 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp @@ -162,24 +162,25 @@ std::unique_ptr MergeTreeMinMaxIndexCreator( expr_list, data.getColumns().getAllPhysical()); auto minmax_expr = ExpressionAnalyzer(expr_list, syntax, context).getActions(false); - auto minmax = std::make_unique( - node->name, std::move(minmax_expr), node->granularity.get()); - auto sample = ExpressionAnalyzer(expr_list, syntax, context) .getActions(true)->getSampleBlock(); + Names columns; + DataTypes data_types; + Poco::Logger * log = &Poco::Logger::get("minmax_idx"); LOG_DEBUG(log, "new minmax index"); for (size_t i = 0; i < expr_list->children.size(); ++i) { const auto & column = sample.getByPosition(i); - minmax->columns.emplace_back(column.name); - minmax->data_types.emplace_back(column.type); + columns.emplace_back(column.name); + data_types.emplace_back(column.type); LOG_DEBUG(log, ">" << column.name << " " << column.type->getName()); } - return minmax; + return std::make_unique( + node->name, std::move(minmax_expr), columns, data_types, node->granularity.get());; } } \ No newline at end of file diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h index 3378f00d1a8..aee681b8e9a 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h @@ -63,8 +63,13 @@ private: class MergeTreeMinMaxIndex : public MergeTreeIndex { public: - MergeTreeMinMaxIndex(String name, ExpressionActionsPtr expr, size_t granularity) - : MergeTreeIndex(name, expr, granularity) {} + MergeTreeMinMaxIndex( + String name, + ExpressionActionsPtr expr, + const Names & columns, + const DataTypes & data_types, + size_t granularity) + : MergeTreeIndex(name, expr, columns, data_types, granularity) {} ~MergeTreeMinMaxIndex() override = default; From 634d5eb51648f4da309d1abc308f9ab9d4912623 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 10 Jan 2019 16:30:06 +0300 Subject: [PATCH 049/586] fix style --- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 84 +++++++++---------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 1c6cbad14ea..45bf3ff6e64 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -145,8 +145,8 @@ BlockInputStreams MergeTreeDataSelectExecutor::read( const PartitionIdToMaxBlock * max_block_numbers_to_read) const { return readFromParts( - data.getDataPartsVector(), column_names_to_return, query_info, context, - max_block_size, num_streams, max_block_numbers_to_read); + data.getDataPartsVector(), column_names_to_return, query_info, context, + max_block_size, num_streams, max_block_numbers_to_read); } BlockInputStreams MergeTreeDataSelectExecutor::readFromParts( @@ -295,8 +295,8 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts( if (select_sample_size) { relative_sample_size.assign( - typeid_cast(*select_sample_size).ratio.numerator, - typeid_cast(*select_sample_size).ratio.denominator); + typeid_cast(*select_sample_size).ratio.numerator, + typeid_cast(*select_sample_size).ratio.denominator); if (relative_sample_size < 0) throw Exception("Negative sample size", ErrorCodes::ARGUMENT_OUT_OF_BOUND); @@ -304,8 +304,8 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts( relative_sample_offset = 0; if (select_sample_offset) relative_sample_offset.assign( - typeid_cast(*select_sample_offset).ratio.numerator, - typeid_cast(*select_sample_offset).ratio.denominator); + typeid_cast(*select_sample_offset).ratio.numerator, + typeid_cast(*select_sample_offset).ratio.denominator); if (relative_sample_offset < 0) throw Exception("Negative sample offset", ErrorCodes::ARGUMENT_OUT_OF_BOUND); @@ -392,7 +392,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts( size_of_universum = RelativeSize(std::numeric_limits::max()) + RelativeSize(1); else throw Exception("Invalid sampling column type in storage parameters: " + type->getName() + ". Must be unsigned integer type.", - ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); + ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); if (settings.parallel_replicas_count > 1) { @@ -578,25 +578,25 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts( column_names_to_read.erase(std::unique(column_names_to_read.begin(), column_names_to_read.end()), column_names_to_read.end()); res = spreadMarkRangesAmongStreamsFinal( - std::move(parts_with_ranges), - column_names_to_read, - max_block_size, - settings.use_uncompressed_cache, - query_info.prewhere_info, - virt_column_names, - settings); + std::move(parts_with_ranges), + column_names_to_read, + max_block_size, + settings.use_uncompressed_cache, + query_info.prewhere_info, + virt_column_names, + settings); } else { res = spreadMarkRangesAmongStreams( - std::move(parts_with_ranges), - num_streams, - column_names_to_read, - max_block_size, - settings.use_uncompressed_cache, - query_info.prewhere_info, - virt_column_names, - settings); + std::move(parts_with_ranges), + num_streams, + column_names_to_read, + max_block_size, + settings.use_uncompressed_cache, + query_info.prewhere_info, + virt_column_names, + settings); } if (use_sampling) @@ -607,7 +607,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts( if (sample_factor_column_queried) for (auto & stream : res) stream = std::make_shared>( - stream, std::make_shared(), used_sample_factor, "_sample_factor"); + stream, std::make_shared(), used_sample_factor, "_sample_factor"); if (query_info.prewhere_info && query_info.prewhere_info->remove_columns_actions) for (auto & stream : res) @@ -628,9 +628,9 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( const Settings & settings) const { const size_t min_marks_for_concurrent_read = - (settings.merge_tree_min_rows_for_concurrent_read + data.index_granularity - 1) / data.index_granularity; + (settings.merge_tree_min_rows_for_concurrent_read + data.index_granularity - 1) / data.index_granularity; const size_t max_marks_to_use_cache = - (settings.merge_tree_max_rows_to_use_cache + data.index_granularity - 1) / data.index_granularity; + (settings.merge_tree_max_rows_to_use_cache + data.index_granularity - 1) / data.index_granularity; /// Count marks for each part. std::vector sum_marks_in_parts(parts.size()); @@ -658,8 +658,8 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( num_streams = std::max((sum_marks + min_marks_for_concurrent_read - 1) / min_marks_for_concurrent_read, parts.size()); MergeTreeReadPoolPtr pool = std::make_shared( - num_streams, sum_marks, min_marks_for_concurrent_read, parts, data, prewhere_info, true, - column_names, MergeTreeReadPool::BackoffSettings(settings), settings.preferred_block_size_bytes, false); + num_streams, sum_marks, min_marks_for_concurrent_read, parts, data, prewhere_info, true, + column_names, MergeTreeReadPool::BackoffSettings(settings), settings.preferred_block_size_bytes, false); /// Let's estimate total number of rows for progress bar. const size_t total_rows = data.index_granularity * sum_marks; @@ -668,9 +668,9 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( for (size_t i = 0; i < num_streams; ++i) { res.emplace_back(std::make_shared( - i, pool, min_marks_for_concurrent_read, max_block_size, settings.preferred_block_size_bytes, - settings.preferred_max_column_in_block_size_bytes, data, use_uncompressed_cache, - prewhere_info, settings, virt_columns)); + i, pool, min_marks_for_concurrent_read, max_block_size, settings.preferred_block_size_bytes, + settings.preferred_max_column_in_block_size_bytes, data, use_uncompressed_cache, + prewhere_info, settings, virt_columns)); if (i == 0) { @@ -744,10 +744,10 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( } BlockInputStreamPtr source_stream = std::make_shared( - data, part.data_part, max_block_size, settings.preferred_block_size_bytes, - settings.preferred_max_column_in_block_size_bytes, column_names, ranges_to_get_from_part, - use_uncompressed_cache, prewhere_info, true, settings.min_bytes_to_use_direct_io, - settings.max_read_buffer_size, true, virt_columns, part.part_index_in_query); + data, part.data_part, max_block_size, settings.preferred_block_size_bytes, + settings.preferred_max_column_in_block_size_bytes, column_names, ranges_to_get_from_part, + use_uncompressed_cache, prewhere_info, true, settings.min_bytes_to_use_direct_io, + settings.max_read_buffer_size, true, virt_columns, part.part_index_in_query); res.push_back(source_stream); } @@ -770,7 +770,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal const Settings & settings) const { const size_t max_marks_to_use_cache = - (settings.merge_tree_max_rows_to_use_cache + data.index_granularity - 1) / data.index_granularity; + (settings.merge_tree_max_rows_to_use_cache + data.index_granularity - 1) / data.index_granularity; size_t sum_marks = 0; for (size_t i = 0; i < parts.size(); ++i) @@ -789,10 +789,10 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal RangesInDataPart & part = parts[part_index]; BlockInputStreamPtr source_stream = std::make_shared( - data, part.data_part, max_block_size, settings.preferred_block_size_bytes, - settings.preferred_max_column_in_block_size_bytes, column_names, part.ranges, use_uncompressed_cache, - prewhere_info, true, settings.min_bytes_to_use_direct_io, settings.max_read_buffer_size, true, - virt_columns, part.part_index_in_query); + data, part.data_part, max_block_size, settings.preferred_block_size_bytes, + settings.preferred_max_column_in_block_size_bytes, column_names, part.ranges, use_uncompressed_cache, + prewhere_info, true, settings.min_bytes_to_use_direct_io, settings.max_read_buffer_size, true, + virt_columns, part.part_index_in_query); to_merge.emplace_back(std::make_shared(source_stream, data.sorting_key_expr)); } @@ -913,7 +913,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( } may_be_true = key_condition.mayBeTrueAfter( - used_key_size, index_left.data(), data.primary_key_data_types); + used_key_size, index_left.data(), data.primary_key_data_types); } else { @@ -924,7 +924,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( } may_be_true = key_condition.mayBeTrueInRange( - used_key_size, index_left.data(), index_right.data(), data.primary_key_data_types); + used_key_size, index_left.data(), index_right.data(), data.primary_key_data_types); } if (!may_be_true) @@ -933,7 +933,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( if (range.end == range.begin + 1) { /// We saw a useful gap between neighboring marks. Either add it to the last range, or start a new range. - if (res.empty() || range.begin - res.back().end > min_marks_for_seek) // is it a bug?? + if (res.empty() || range.begin - res.back().end > min_marks_for_seek) res.push_back(range); else res.back().end = range.end; From 701627ec6000fb30da876872fb7eab6686c60002 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 10 Jan 2019 16:50:41 +0300 Subject: [PATCH 050/586] fixed includes --- dbms/src/Storages/MergeTree/MergeTreeData.h | 1 - dbms/src/Storages/MergeTree/MergeTreeIndexReader.h | 1 - dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp | 5 +++++ dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h | 5 ----- 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index e49a473afe1..e71ad5fa9d6 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexReader.h b/dbms/src/Storages/MergeTree/MergeTreeIndexReader.h index fee68cc3915..5eb2caf62cd 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexReader.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexReader.h @@ -2,7 +2,6 @@ #include #include -#include #include namespace DB { diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp index d410594bf45..993dd7acc02 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp @@ -1,4 +1,9 @@ #include + +#include +#include +#include + #include namespace DB diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h index aee681b8e9a..198263ae243 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h @@ -1,14 +1,9 @@ #pragma once #include -#include #include #include -#include -#include -#include - #include From d310d1a5ec383f9c9ba3c0475159f99473d8fb0f Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 10 Jan 2019 19:51:49 +0300 Subject: [PATCH 051/586] fixed setSkipIndexes --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 54 +++++++++++-------- dbms/src/Storages/MergeTree/MergeTreeData.h | 1 + dbms/src/Storages/StorageMergeTree.cpp | 1 + .../Storages/StorageReplicatedMergeTree.cpp | 1 + 4 files changed, 36 insertions(+), 21 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 3801b81530d..c2396e02988 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -116,6 +116,7 @@ MergeTreeData::MergeTreeData( data_parts_by_state_and_info(data_parts_indexes.get()) { setPrimaryKeyAndColumns(order_by_ast_, primary_key_ast_, columns_); + setSkipIndexes(indexes_ast_); /// NOTE: using the same columns list as is read when performing actual merges. merging_params.check(getColumns().getAllPhysical()); @@ -189,8 +190,6 @@ MergeTreeData::MergeTreeData( throw Exception( "MergeTree data format version on disk doesn't support custom partitioning", ErrorCodes::METADATA_MISMATCH); - - setSkipIndexes(indexes_ast_); } @@ -356,27 +355,32 @@ void MergeTreeData::setSkipIndexes(const ASTPtr & indexes_asts, bool only_check) { return; } + + MergeTreeIndexes new_indexes; + std::set names; + auto index_list = std::dynamic_pointer_cast(indexes_asts); + + for (const auto &index_ast : index_list->children) + { + new_indexes.push_back( + std::move(MergeTreeIndexFactory::instance().get( + *this, + std::dynamic_pointer_cast(index_ast), + global_context))); + + if (names.find(new_indexes.back()->name) != names.end()) + { + throw Exception( + "Index with name `" + new_indexes.back()->name + "` already exsists", + ErrorCodes::LOGICAL_ERROR); + } + names.insert(new_indexes.back()->name); + } + if (!only_check) { - indexes.clear(); - std::set names; - auto index_list = std::dynamic_pointer_cast(indexes_asts); - - for (const auto &index_ast : index_list->children) - { - indexes.push_back( - std::move(MergeTreeIndexFactory::instance().get( - *this, - std::dynamic_pointer_cast(index_ast), - global_context))); - if (names.find(indexes.back()->name) != names.end()) - { - throw Exception( - "Index with name `" + indexes.back()->name + "` already exsists", - ErrorCodes::LOGICAL_ERROR); - } - names.insert(indexes.back()->name); - } + skip_indexes_ast = indexes_asts; + indexes = std::move(new_indexes); } } @@ -1056,6 +1060,13 @@ void MergeTreeData::checkAlter(const AlterCommands & commands) columns_alter_forbidden.insert(col); } + for (auto index : indexes) + { + /// TODO: some special error telling about "drop index" + for (const String & col : index->expr->getRequiredColumns()) + columns_alter_forbidden.insert(col); + } + if (sorting_key_expr) { for (const ExpressionAction & action : sorting_key_expr->getActions()) @@ -1111,6 +1122,7 @@ void MergeTreeData::checkAlter(const AlterCommands & commands) } setPrimaryKeyAndColumns(new_order_by_ast, new_primary_key_ast, new_columns, /* only_check = */ true); + setSkipIndexes(skip_indexes_ast, /* only_check = */ true); /// Check that type conversions are possible. ExpressionActionsPtr unused_expression; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index e71ad5fa9d6..70ae8b25c67 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -583,6 +583,7 @@ public: /// Secondary (data skipping) indexes for MergeTree MergeTreeIndexes indexes; + ASTPtr skip_indexes_ast; /// Names of columns for primary key + secondary sorting columns. Names sorting_key_columns; diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 833b20ab05b..f71a64662a4 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -244,6 +244,7 @@ void StorageMergeTree::alter( /// Reinitialize primary key because primary key column types might have changed. data.setPrimaryKeyAndColumns(new_order_by_ast, new_primary_key_ast, new_columns); + data.setSkipIndexes(data.skip_indexes_ast); for (auto & transaction : transactions) transaction->commit(); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index f60250d1be5..50b133482b2 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -461,6 +461,7 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column /// Even if the primary/sorting keys didn't change we must reinitialize it /// because primary key column types might have changed. data.setPrimaryKeyAndColumns(new_order_by_ast, new_primary_key_ast, new_columns); + data.setSkipIndexes(data.skip_indexes_ast); } From 89b831eaf75c7479bd89876bb0153b9374957a54 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 10 Jan 2019 20:48:04 +0300 Subject: [PATCH 052/586] added indexes meta to zookeeper --- .../MergeTree/ReplicatedMergeTreeTableMetadata.cpp | 14 ++++++++++++++ .../MergeTree/ReplicatedMergeTreeTableMetadata.h | 1 + 2 files changed, 15 insertions(+) diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index ae5249d3d16..aaabc6901ae 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -44,6 +44,8 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr if (data.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) partition_key = formattedAST(MergeTreeData::extractKeyExpressionList(data.partition_by_ast)); + + skip_indexes = formattedAST(data.skip_indexes_ast); } void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const @@ -64,6 +66,9 @@ void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const if (!sorting_key.empty()) out << "sorting key: " << sorting_key << "\n"; + + if (!skip_indexes.empty()) + out << "skip indexes: " << skip_indexes << "\n"; } String ReplicatedMergeTreeTableMetadata::toString() const @@ -93,6 +98,9 @@ void ReplicatedMergeTreeTableMetadata::read(ReadBuffer & in) if (checkString("sorting key: ", in)) in >> sorting_key >> "\n"; + + if (checkString("skip indexes: ", in)) + in >> skip_indexes >> "\n"; } ReplicatedMergeTreeTableMetadata ReplicatedMergeTreeTableMetadata::parse(const String & s) @@ -175,6 +183,12 @@ ReplicatedMergeTreeTableMetadata::checkAndFindDiff(const ReplicatedMergeTreeTabl ErrorCodes::METADATA_MISMATCH); } + if (skip_indexes != from_zk.skip_indexes) + throw Exception("Existing table metadata in ZooKeeper differs in skip indexes." + " Stored in ZooKeeper: " + from_zk.skip_indexes + + ", local: " + skip_indexes, + ErrorCodes::METADATA_MISMATCH); + return diff; } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h index b063e226348..5fd863046e4 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h @@ -25,6 +25,7 @@ struct ReplicatedMergeTreeTableMetadata MergeTreeDataFormatVersion data_format_version; String partition_key; String sorting_key; + String skip_indexes; ReplicatedMergeTreeTableMetadata() = default; explicit ReplicatedMergeTreeTableMetadata(const MergeTreeData & data); From 9031be381ad9965dbe7ab1a528621b77d50e28e9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 11 Jan 2019 19:58:43 +0300 Subject: [PATCH 053/586] Attempt to implemnt global thread pool #4018 --- dbms/src/Databases/IDatabase.h | 4 +- .../src/Interpreters/InterpreterCreateQuery.h | 4 +- libs/libcommon/include/common/ThreadPool.h | 73 +++++++++++++++---- libs/libcommon/src/ThreadPool.cpp | 61 ++++++++++------ 4 files changed, 103 insertions(+), 39 deletions(-) diff --git a/dbms/src/Databases/IDatabase.h b/dbms/src/Databases/IDatabase.h index 7b7b877b0e1..64a65c2fb5f 100644 --- a/dbms/src/Databases/IDatabase.h +++ b/dbms/src/Databases/IDatabase.h @@ -8,12 +8,10 @@ #include #include #include +#include #include -class ThreadPool; - - namespace DB { diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.h b/dbms/src/Interpreters/InterpreterCreateQuery.h index e450ae0728e..40089e17d25 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.h +++ b/dbms/src/Interpreters/InterpreterCreateQuery.h @@ -2,12 +2,12 @@ #include #include +#include -class ThreadPool; - namespace DB { + class Context; class ASTCreateQuery; class ASTExpressionList; diff --git a/libs/libcommon/include/common/ThreadPool.h b/libs/libcommon/include/common/ThreadPool.h index dd82c0c0399..02e1a02c58e 100644 --- a/libs/libcommon/include/common/ThreadPool.h +++ b/libs/libcommon/include/common/ThreadPool.h @@ -7,6 +7,7 @@ #include #include #include +#include /** Very simple thread pool similar to boost::threadpool. @@ -14,17 +15,22 @@ * - catches exceptions and rethrows on wait. */ -class ThreadPool +template +class ThreadPoolImpl { public: using Job = std::function; - /// Size is constant, all threads are created immediately. - explicit ThreadPool(size_t m_size); + /// Size is constant. Up to num_threads are created on demand and then run until shutdown. + explicit ThreadPoolImpl(size_t num_threads); - /// Add new job. Locks until free thread in pool become available or exception in one of threads was thrown. + /// queue_size - maximum number of running plus scheduled jobs. It can be greater than num_threads. Zero means unlimited. + ThreadPoolImpl(size_t num_threads, size_t queue_size); + + /// Add new job. Locks until number of active jobs is less than maximum or exception in one of threads was thrown. /// If an exception in some thread was thrown, method silently returns, and exception will be rethrown only on call to 'wait' function. - void schedule(Job job); + /// Priority: greater is higher. + void schedule(Job job, int priority = 0); /// Wait for all currently active jobs to be done. /// You may call schedule and wait many times in arbitary order. @@ -34,24 +40,40 @@ public: /// Waits for all threads. Doesn't rethrow exceptions (use 'wait' method to rethrow exceptions). /// You should not destroy object while calling schedule or wait methods from another threads. - ~ThreadPool(); + ~ThreadPoolImpl(); - size_t size() const { return m_size; } + size_t size() const { return num_threads; } - /// Returns number of active jobs. + /// Returns number of running and scheduled jobs. size_t active() const; private: mutable std::mutex mutex; - std::condition_variable has_free_thread; - std::condition_variable has_new_job_or_shutdown; + std::condition_variable job_finished; + std::condition_variable new_job_or_shutdown; + + const size_t num_threads; + const size_t queue_size; - const size_t m_size; size_t active_jobs = 0; bool shutdown = false; - std::queue jobs; - std::vector threads; + struct JobWithPriority + { + Job job; + int priority; + + JobWithPriority(Job job, int priority) + : job(job), priority(priority) {} + + bool operator< (const JobWithPriority & rhs) const + { + return priority < rhs.priority; + } + }; + + std::priority_queue jobs; + std::vector threads; std::exception_ptr first_exception; @@ -61,6 +83,31 @@ private: }; +using FreeThreadPool = ThreadPoolImpl; + +class GlobalThreadPool : public FreeThreadPool, public ext::singleton +{ +public: + GlobalThreadPool() : FreeThreadPool(10000) {} /// TODO: global blocking limit may lead to deadlocks. +}; + +class ThreadFromGlobalPool +{ +public: + ThreadFromGlobalPool(std::function func) + { + GlobalThreadPool::instance().schedule(func); + } + + void join() + { + /// noop, the std::thread will continue to run inside global pool. + } +}; + +using ThreadPool = ThreadPoolImpl; + + /// Allows to save first catched exception in jobs and postpone its rethrow. class ExceptionHandler { diff --git a/libs/libcommon/src/ThreadPool.cpp b/libs/libcommon/src/ThreadPool.cpp index e45e64853dc..e460acb3163 100644 --- a/libs/libcommon/src/ThreadPool.cpp +++ b/libs/libcommon/src/ThreadPool.cpp @@ -2,14 +2,21 @@ #include -ThreadPool::ThreadPool(size_t m_size) - : m_size(m_size) +template +ThreadPoolImpl::ThreadPoolImpl(size_t num_threads) + : ThreadPoolImpl(num_threads, num_threads) { - threads.reserve(m_size); +} + +template +ThreadPoolImpl::ThreadPoolImpl(size_t num_threads, size_t queue_size) + : num_threads(num_threads), queue_size(queue_size) +{ + threads.reserve(num_threads); try { - for (size_t i = 0; i < m_size; ++i) + for (size_t i = 0; i < num_threads; ++i) threads.emplace_back([this] { worker(); }); } catch (...) @@ -19,25 +26,30 @@ ThreadPool::ThreadPool(size_t m_size) } } -void ThreadPool::schedule(Job job) +template +void ThreadPoolImpl::schedule(Job job, int priority) { { std::unique_lock lock(mutex); - has_free_thread.wait(lock, [this] { return active_jobs < m_size || shutdown; }); + job_finished.wait(lock, [this] { return !queue_size || active_jobs < queue_size || shutdown; }); if (shutdown) return; - jobs.push(std::move(job)); + jobs.emplace(std::move(job), priority); ++active_jobs; + + if (threads.size() < std::min(num_threads, active_jobs)) + threads.emplace_back([this] { worker(); }); } - has_new_job_or_shutdown.notify_one(); + new_job_or_shutdown.notify_one(); } -void ThreadPool::wait() +template +void ThreadPoolImpl::wait() { { std::unique_lock lock(mutex); - has_free_thread.wait(lock, [this] { return active_jobs == 0; }); + job_finished.wait(lock, [this] { return active_jobs == 0; }); if (first_exception) { @@ -48,19 +60,21 @@ void ThreadPool::wait() } } -ThreadPool::~ThreadPool() +template +ThreadPoolImpl::~ThreadPoolImpl() { finalize(); } -void ThreadPool::finalize() +template +void ThreadPoolImpl::finalize() { { std::unique_lock lock(mutex); shutdown = true; } - has_new_job_or_shutdown.notify_all(); + new_job_or_shutdown.notify_all(); for (auto & thread : threads) thread.join(); @@ -68,14 +82,15 @@ void ThreadPool::finalize() threads.clear(); } -size_t ThreadPool::active() const +template +size_t ThreadPoolImpl::active() const { std::unique_lock lock(mutex); return active_jobs; } - -void ThreadPool::worker() +template +void ThreadPoolImpl::worker() { while (true) { @@ -84,12 +99,12 @@ void ThreadPool::worker() { std::unique_lock lock(mutex); - has_new_job_or_shutdown.wait(lock, [this] { return shutdown || !jobs.empty(); }); + new_job_or_shutdown.wait(lock, [this] { return shutdown || !jobs.empty(); }); need_shutdown = shutdown; if (!jobs.empty()) { - job = std::move(jobs.front()); + job = jobs.top().job; jobs.pop(); } else @@ -113,8 +128,8 @@ void ThreadPool::worker() shutdown = true; --active_jobs; } - has_free_thread.notify_all(); - has_new_job_or_shutdown.notify_all(); + job_finished.notify_all(); + new_job_or_shutdown.notify_all(); return; } } @@ -124,11 +139,15 @@ void ThreadPool::worker() --active_jobs; } - has_free_thread.notify_all(); + job_finished.notify_all(); } } +template class ThreadPoolImpl; +template class ThreadPoolImpl; + + void ExceptionHandler::setException(std::exception_ptr && exception) { std::unique_lock lock(mutex); From 79ac86375ea65b022872ebfdc73ebee374d6d730 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 11 Jan 2019 20:07:04 +0300 Subject: [PATCH 054/586] Attempt to implemnt global thread pool #4018 --- contrib/jemalloc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/jemalloc b/contrib/jemalloc index 41b7372eade..cd2931ad9bb 160000 --- a/contrib/jemalloc +++ b/contrib/jemalloc @@ -1 +1 @@ -Subproject commit 41b7372eadee941b9164751b8d4963f915d3ceae +Subproject commit cd2931ad9bbd78208565716ab102e86d858c2fff From 43774a38b8ffc1534a94c258f9b5bb679be2acbd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 11 Jan 2019 20:20:06 +0300 Subject: [PATCH 055/586] Attempt to implemnt global thread pool #4018 --- libs/libcommon/src/ThreadPool.cpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/libs/libcommon/src/ThreadPool.cpp b/libs/libcommon/src/ThreadPool.cpp index e460acb3163..e3f03e18a46 100644 --- a/libs/libcommon/src/ThreadPool.cpp +++ b/libs/libcommon/src/ThreadPool.cpp @@ -13,17 +13,6 @@ ThreadPoolImpl::ThreadPoolImpl(size_t num_threads, size_t queue_size) : num_threads(num_threads), queue_size(queue_size) { threads.reserve(num_threads); - - try - { - for (size_t i = 0; i < num_threads; ++i) - threads.emplace_back([this] { worker(); }); - } - catch (...) - { - finalize(); - throw; - } } template From 13dd877026d7756f5ec1d31dc7e0860b1bb57a75 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 11 Jan 2019 21:38:11 +0300 Subject: [PATCH 056/586] Attempt to implemnt global thread pool #4018 --- libs/libcommon/include/common/ThreadPool.h | 32 ++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/libs/libcommon/include/common/ThreadPool.h b/libs/libcommon/include/common/ThreadPool.h index 02e1a02c58e..a8cf84dd7b5 100644 --- a/libs/libcommon/include/common/ThreadPool.h +++ b/libs/libcommon/include/common/ThreadPool.h @@ -94,15 +94,43 @@ public: class ThreadFromGlobalPool { public: + ThreadFromGlobalPool() {} + ThreadFromGlobalPool(std::function func) { - GlobalThreadPool::instance().schedule(func); + mutex = std::make_unique(); + /// The function object must be copyable, so we wrap lock_guard in shared_ptr. + GlobalThreadPool::instance().schedule([lock = std::make_shared>(*mutex), func = std::move(func)] { func(); }); + } + + ThreadFromGlobalPool(ThreadFromGlobalPool && rhs) + { + *this = std::move(rhs); + } + + ThreadFromGlobalPool & operator=(ThreadFromGlobalPool && rhs) + { + if (mutex) + std::terminate(); + mutex = std::move(rhs.mutex); + return *this; + } + + ~ThreadFromGlobalPool() + { + if (mutex) + std::terminate(); } void join() { - /// noop, the std::thread will continue to run inside global pool. + { + std::lock_guard lock(*mutex); + } + mutex.reset(); } +private: + std::unique_ptr mutex; /// Object must be moveable. }; using ThreadPool = ThreadPoolImpl; From abcd5a2a49a00364f1ec283abd066dfa93382046 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 11 Jan 2019 22:12:36 +0300 Subject: [PATCH 057/586] Attempt to implemnt global thread pool #4018 --- dbms/programs/benchmark/Benchmark.cpp | 2 +- dbms/programs/copier/ClusterCopier.cpp | 2 +- .../performance-test/PerformanceTest.cpp | 2 +- dbms/src/Common/tests/CMakeLists.txt | 6 + dbms/src/Common/tests/gtest_rw_lock.cpp | 2 +- .../src/Common/tests/parallel_aggregation.cpp | 2 +- .../Common/tests/parallel_aggregation2.cpp | 2 +- .../Common/tests/thread_creation_latency.cpp | 2 +- dbms/src/Common/tests/thread_pool.cpp | 2 +- .../AsynchronousBlockInputStream.h | 2 +- ...ggregatedMemoryEfficientBlockInputStream.h | 2 +- .../PushingToViewsBlockOutputStream.cpp | 2 +- dbms/src/Databases/DatabaseFactory.h | 2 +- dbms/src/Databases/DatabaseOrdinary.cpp | 2 +- dbms/src/Databases/IDatabase.h | 2 +- dbms/src/IO/AsynchronousWriteBuffer.h | 2 +- dbms/src/Interpreters/Aggregator.h | 2 +- dbms/src/Interpreters/Compiler.h | 2 +- dbms/src/Interpreters/Context.h | 2 +- dbms/src/Interpreters/EmbeddedDictionaries.h | 2 +- .../src/Interpreters/InterpreterCreateQuery.h | 2 +- .../Interpreters/InterpreterSystemQuery.cpp | 2 +- dbms/src/Interpreters/loadMetadata.cpp | 2 +- .../src/Interpreters/tests/internal_iotop.cpp | 2 +- .../DistributedBlockOutputStream.h | 2 +- dbms/src/Storages/StorageDictionary.h | 2 +- .../Storages/StorageReplicatedMergeTree.cpp | 2 +- libs/libcommon/CMakeLists.txt | 3 - libs/libcommon/include/common/MultiVersion.h | 56 ------ libs/libcommon/include/common/ThreadPool.h | 151 ---------------- libs/libcommon/src/ThreadPool.cpp | 169 ------------------ libs/libcommon/src/tests/CMakeLists.txt | 2 - libs/libcommon/src/tests/multi_version.cpp | 56 ------ 33 files changed, 32 insertions(+), 463 deletions(-) delete mode 100644 libs/libcommon/include/common/MultiVersion.h delete mode 100644 libs/libcommon/include/common/ThreadPool.h delete mode 100644 libs/libcommon/src/ThreadPool.cpp delete mode 100644 libs/libcommon/src/tests/multi_version.cpp diff --git a/dbms/programs/benchmark/Benchmark.cpp b/dbms/programs/benchmark/Benchmark.cpp index 9bd3bda825a..b366add0ba5 100644 --- a/dbms/programs/benchmark/Benchmark.cpp +++ b/dbms/programs/benchmark/Benchmark.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/programs/copier/ClusterCopier.cpp b/dbms/programs/copier/ClusterCopier.cpp index 4f285c83f17..59635e8cd95 100644 --- a/dbms/programs/copier/ClusterCopier.cpp +++ b/dbms/programs/copier/ClusterCopier.cpp @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include diff --git a/dbms/programs/performance-test/PerformanceTest.cpp b/dbms/programs/performance-test/PerformanceTest.cpp index 27bf986fc1b..ca5677e0dc5 100644 --- a/dbms/programs/performance-test/PerformanceTest.cpp +++ b/dbms/programs/performance-test/PerformanceTest.cpp @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/src/Common/tests/CMakeLists.txt b/dbms/src/Common/tests/CMakeLists.txt index ec9636ce664..163b52991ae 100644 --- a/dbms/src/Common/tests/CMakeLists.txt +++ b/dbms/src/Common/tests/CMakeLists.txt @@ -53,6 +53,12 @@ target_link_libraries (thread_creation_latency PRIVATE clickhouse_common_io) add_executable (thread_pool thread_pool.cpp) target_link_libraries (thread_pool PRIVATE clickhouse_common_io) +add_executable (thread_pool_2 thread_pool_2.cpp) +target_link_libraries (thread_pool_2 PRIVATE clickhouse_common_io) + +add_executable (multi_version multi_version.cpp) +target_link_libraries (multi_version PRIVATE clickhouse_common_io) + add_executable (array_cache array_cache.cpp) target_link_libraries (array_cache PRIVATE clickhouse_common_io) diff --git a/dbms/src/Common/tests/gtest_rw_lock.cpp b/dbms/src/Common/tests/gtest_rw_lock.cpp index 6fd16be64cd..c95be0d641d 100644 --- a/dbms/src/Common/tests/gtest_rw_lock.cpp +++ b/dbms/src/Common/tests/gtest_rw_lock.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/src/Common/tests/parallel_aggregation.cpp b/dbms/src/Common/tests/parallel_aggregation.cpp index 15a193de3d7..ba430b0c58c 100644 --- a/dbms/src/Common/tests/parallel_aggregation.cpp +++ b/dbms/src/Common/tests/parallel_aggregation.cpp @@ -16,7 +16,7 @@ #include #include -#include +#include using Key = UInt64; diff --git a/dbms/src/Common/tests/parallel_aggregation2.cpp b/dbms/src/Common/tests/parallel_aggregation2.cpp index dc43442de08..699fb65e9dc 100644 --- a/dbms/src/Common/tests/parallel_aggregation2.cpp +++ b/dbms/src/Common/tests/parallel_aggregation2.cpp @@ -16,7 +16,7 @@ #include #include -#include +#include using Key = UInt64; diff --git a/dbms/src/Common/tests/thread_creation_latency.cpp b/dbms/src/Common/tests/thread_creation_latency.cpp index ef910a3e9f3..9b551f713c1 100644 --- a/dbms/src/Common/tests/thread_creation_latency.cpp +++ b/dbms/src/Common/tests/thread_creation_latency.cpp @@ -5,7 +5,7 @@ #include #include -#include +#include int x = 0; diff --git a/dbms/src/Common/tests/thread_pool.cpp b/dbms/src/Common/tests/thread_pool.cpp index 841cc740db1..23dba2aadec 100644 --- a/dbms/src/Common/tests/thread_pool.cpp +++ b/dbms/src/Common/tests/thread_pool.cpp @@ -1,4 +1,4 @@ -#include +#include /** Reproduces bug in ThreadPool. * It get stuck if we call 'wait' many times from many other threads simultaneously. diff --git a/dbms/src/DataStreams/AsynchronousBlockInputStream.h b/dbms/src/DataStreams/AsynchronousBlockInputStream.h index c790deb49c2..f5770411588 100644 --- a/dbms/src/DataStreams/AsynchronousBlockInputStream.h +++ b/dbms/src/DataStreams/AsynchronousBlockInputStream.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include diff --git a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h index bdabd8cc1f8..612adcc3599 100644 --- a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h +++ b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include diff --git a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp index d3e7eaeb9a2..9651eb9e39f 100644 --- a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp +++ b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include namespace DB diff --git a/dbms/src/Databases/DatabaseFactory.h b/dbms/src/Databases/DatabaseFactory.h index 00265a2454b..bb912ca377b 100644 --- a/dbms/src/Databases/DatabaseFactory.h +++ b/dbms/src/Databases/DatabaseFactory.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index cb1c7587080..3ca8cd17f71 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/src/Databases/IDatabase.h b/dbms/src/Databases/IDatabase.h index 64a65c2fb5f..90dfa229f11 100644 --- a/dbms/src/Databases/IDatabase.h +++ b/dbms/src/Databases/IDatabase.h @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include diff --git a/dbms/src/IO/AsynchronousWriteBuffer.h b/dbms/src/IO/AsynchronousWriteBuffer.h index d54f18e9f5d..c87777450e7 100644 --- a/dbms/src/IO/AsynchronousWriteBuffer.h +++ b/dbms/src/IO/AsynchronousWriteBuffer.h @@ -4,7 +4,7 @@ #include -#include +#include #include diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index f51f620064f..bed147d627d 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include diff --git a/dbms/src/Interpreters/Compiler.h b/dbms/src/Interpreters/Compiler.h index 3f1fa8924ec..b79cf26e0f0 100644 --- a/dbms/src/Interpreters/Compiler.h +++ b/dbms/src/Interpreters/Compiler.h @@ -15,7 +15,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 72354753e36..ca90073436a 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include #include diff --git a/dbms/src/Interpreters/EmbeddedDictionaries.h b/dbms/src/Interpreters/EmbeddedDictionaries.h index a4f97308401..ad2dd404b3e 100644 --- a/dbms/src/Interpreters/EmbeddedDictionaries.h +++ b/dbms/src/Interpreters/EmbeddedDictionaries.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.h b/dbms/src/Interpreters/InterpreterCreateQuery.h index 40089e17d25..0fc26847574 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.h +++ b/dbms/src/Interpreters/InterpreterCreateQuery.h @@ -2,7 +2,7 @@ #include #include -#include +#include namespace DB diff --git a/dbms/src/Interpreters/InterpreterSystemQuery.cpp b/dbms/src/Interpreters/InterpreterSystemQuery.cpp index fc472ad8a9e..cae7e13e342 100644 --- a/dbms/src/Interpreters/InterpreterSystemQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSystemQuery.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/src/Interpreters/loadMetadata.cpp b/dbms/src/Interpreters/loadMetadata.cpp index 38e8407082d..0287990d4e5 100644 --- a/dbms/src/Interpreters/loadMetadata.cpp +++ b/dbms/src/Interpreters/loadMetadata.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include diff --git a/dbms/src/Interpreters/tests/internal_iotop.cpp b/dbms/src/Interpreters/tests/internal_iotop.cpp index c1088819f85..3a171c0c845 100644 --- a/dbms/src/Interpreters/tests/internal_iotop.cpp +++ b/dbms/src/Interpreters/tests/internal_iotop.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h index 1c3dd8217e1..de802a09483 100644 --- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h +++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/src/Storages/StorageDictionary.h b/dbms/src/Storages/StorageDictionary.h index cffaf8879cd..08a3f32093b 100644 --- a/dbms/src/Storages/StorageDictionary.h +++ b/dbms/src/Storages/StorageDictionary.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 5350b1db579..67774e7a113 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -44,7 +44,7 @@ #include -#include +#include #include #include diff --git a/libs/libcommon/CMakeLists.txt b/libs/libcommon/CMakeLists.txt index 5c6c242407f..0c6455ece26 100644 --- a/libs/libcommon/CMakeLists.txt +++ b/libs/libcommon/CMakeLists.txt @@ -18,7 +18,6 @@ add_library (common ${LINK_MODE} src/mremap.cpp src/JSON.cpp src/getMemoryAmount.cpp - src/ThreadPool.cpp src/demangle.cpp src/SetTerminalEcho.cpp @@ -34,11 +33,9 @@ add_library (common ${LINK_MODE} include/common/mremap.h include/common/likely.h include/common/logger_useful.h - include/common/MultiVersion.h include/common/strong_typedef.h include/common/JSON.h include/common/getMemoryAmount.h - include/common/ThreadPool.h include/common/demangle.h include/common/SetTerminalEcho.h include/common/find_symbols.h diff --git a/libs/libcommon/include/common/MultiVersion.h b/libs/libcommon/include/common/MultiVersion.h deleted file mode 100644 index 3014689f861..00000000000 --- a/libs/libcommon/include/common/MultiVersion.h +++ /dev/null @@ -1,56 +0,0 @@ -#pragma once - -#include -#include - - -/** Allow to store and read-only usage of an object in several threads, - * and to atomically replace an object in another thread. - * The replacement is atomic and reading threads can work with different versions of an object. - * - * Usage: - * MultiVersion x; - * - on data update: - * x.set(new value); - * - on read-only usage: - * { - * MultiVersion::Version current_version = x.get(); - * // use *current_version - * } // now we finish own current version; if the version is outdated and no one else is using it - it will be destroyed. - * - * All methods are thread-safe. - */ -template -class MultiVersion -{ -public: - /// Version of object for usage. shared_ptr manage lifetime of version. - using Version = std::shared_ptr; - - /// Default initialization - by nullptr. - MultiVersion() = default; - - MultiVersion(std::unique_ptr && value) - { - set(std::move(value)); - } - - /// Obtain current version for read-only usage. Returns shared_ptr, that manages lifetime of version. - Version get() const - { - /// NOTE: is it possible to lock-free replace of shared_ptr? - std::lock_guard lock(mutex); - return current_version; - } - - /// Update an object with new version. - void set(std::unique_ptr && value) - { - std::lock_guard lock(mutex); - current_version = std::move(value); - } - -private: - Version current_version; - mutable std::mutex mutex; -}; diff --git a/libs/libcommon/include/common/ThreadPool.h b/libs/libcommon/include/common/ThreadPool.h deleted file mode 100644 index a8cf84dd7b5..00000000000 --- a/libs/libcommon/include/common/ThreadPool.h +++ /dev/null @@ -1,151 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - - -/** Very simple thread pool similar to boost::threadpool. - * Advantages: - * - catches exceptions and rethrows on wait. - */ - -template -class ThreadPoolImpl -{ -public: - using Job = std::function; - - /// Size is constant. Up to num_threads are created on demand and then run until shutdown. - explicit ThreadPoolImpl(size_t num_threads); - - /// queue_size - maximum number of running plus scheduled jobs. It can be greater than num_threads. Zero means unlimited. - ThreadPoolImpl(size_t num_threads, size_t queue_size); - - /// Add new job. Locks until number of active jobs is less than maximum or exception in one of threads was thrown. - /// If an exception in some thread was thrown, method silently returns, and exception will be rethrown only on call to 'wait' function. - /// Priority: greater is higher. - void schedule(Job job, int priority = 0); - - /// Wait for all currently active jobs to be done. - /// You may call schedule and wait many times in arbitary order. - /// If any thread was throw an exception, first exception will be rethrown from this method, - /// and exception will be cleared. - void wait(); - - /// Waits for all threads. Doesn't rethrow exceptions (use 'wait' method to rethrow exceptions). - /// You should not destroy object while calling schedule or wait methods from another threads. - ~ThreadPoolImpl(); - - size_t size() const { return num_threads; } - - /// Returns number of running and scheduled jobs. - size_t active() const; - -private: - mutable std::mutex mutex; - std::condition_variable job_finished; - std::condition_variable new_job_or_shutdown; - - const size_t num_threads; - const size_t queue_size; - - size_t active_jobs = 0; - bool shutdown = false; - - struct JobWithPriority - { - Job job; - int priority; - - JobWithPriority(Job job, int priority) - : job(job), priority(priority) {} - - bool operator< (const JobWithPriority & rhs) const - { - return priority < rhs.priority; - } - }; - - std::priority_queue jobs; - std::vector threads; - std::exception_ptr first_exception; - - - void worker(); - - void finalize(); -}; - - -using FreeThreadPool = ThreadPoolImpl; - -class GlobalThreadPool : public FreeThreadPool, public ext::singleton -{ -public: - GlobalThreadPool() : FreeThreadPool(10000) {} /// TODO: global blocking limit may lead to deadlocks. -}; - -class ThreadFromGlobalPool -{ -public: - ThreadFromGlobalPool() {} - - ThreadFromGlobalPool(std::function func) - { - mutex = std::make_unique(); - /// The function object must be copyable, so we wrap lock_guard in shared_ptr. - GlobalThreadPool::instance().schedule([lock = std::make_shared>(*mutex), func = std::move(func)] { func(); }); - } - - ThreadFromGlobalPool(ThreadFromGlobalPool && rhs) - { - *this = std::move(rhs); - } - - ThreadFromGlobalPool & operator=(ThreadFromGlobalPool && rhs) - { - if (mutex) - std::terminate(); - mutex = std::move(rhs.mutex); - return *this; - } - - ~ThreadFromGlobalPool() - { - if (mutex) - std::terminate(); - } - - void join() - { - { - std::lock_guard lock(*mutex); - } - mutex.reset(); - } -private: - std::unique_ptr mutex; /// Object must be moveable. -}; - -using ThreadPool = ThreadPoolImpl; - - -/// Allows to save first catched exception in jobs and postpone its rethrow. -class ExceptionHandler -{ -public: - void setException(std::exception_ptr && exception); - void throwIfException(); - -private: - std::exception_ptr first_exception; - std::mutex mutex; -}; - -ThreadPool::Job createExceptionHandledJob(ThreadPool::Job job, ExceptionHandler & handler); diff --git a/libs/libcommon/src/ThreadPool.cpp b/libs/libcommon/src/ThreadPool.cpp deleted file mode 100644 index e3f03e18a46..00000000000 --- a/libs/libcommon/src/ThreadPool.cpp +++ /dev/null @@ -1,169 +0,0 @@ -#include -#include - - -template -ThreadPoolImpl::ThreadPoolImpl(size_t num_threads) - : ThreadPoolImpl(num_threads, num_threads) -{ -} - -template -ThreadPoolImpl::ThreadPoolImpl(size_t num_threads, size_t queue_size) - : num_threads(num_threads), queue_size(queue_size) -{ - threads.reserve(num_threads); -} - -template -void ThreadPoolImpl::schedule(Job job, int priority) -{ - { - std::unique_lock lock(mutex); - job_finished.wait(lock, [this] { return !queue_size || active_jobs < queue_size || shutdown; }); - if (shutdown) - return; - - jobs.emplace(std::move(job), priority); - ++active_jobs; - - if (threads.size() < std::min(num_threads, active_jobs)) - threads.emplace_back([this] { worker(); }); - } - new_job_or_shutdown.notify_one(); -} - -template -void ThreadPoolImpl::wait() -{ - { - std::unique_lock lock(mutex); - job_finished.wait(lock, [this] { return active_jobs == 0; }); - - if (first_exception) - { - std::exception_ptr exception; - std::swap(exception, first_exception); - std::rethrow_exception(exception); - } - } -} - -template -ThreadPoolImpl::~ThreadPoolImpl() -{ - finalize(); -} - -template -void ThreadPoolImpl::finalize() -{ - { - std::unique_lock lock(mutex); - shutdown = true; - } - - new_job_or_shutdown.notify_all(); - - for (auto & thread : threads) - thread.join(); - - threads.clear(); -} - -template -size_t ThreadPoolImpl::active() const -{ - std::unique_lock lock(mutex); - return active_jobs; -} - -template -void ThreadPoolImpl::worker() -{ - while (true) - { - Job job; - bool need_shutdown = false; - - { - std::unique_lock lock(mutex); - new_job_or_shutdown.wait(lock, [this] { return shutdown || !jobs.empty(); }); - need_shutdown = shutdown; - - if (!jobs.empty()) - { - job = jobs.top().job; - jobs.pop(); - } - else - { - return; - } - } - - if (!need_shutdown) - { - try - { - job(); - } - catch (...) - { - { - std::unique_lock lock(mutex); - if (!first_exception) - first_exception = std::current_exception(); - shutdown = true; - --active_jobs; - } - job_finished.notify_all(); - new_job_or_shutdown.notify_all(); - return; - } - } - - { - std::unique_lock lock(mutex); - --active_jobs; - } - - job_finished.notify_all(); - } -} - - -template class ThreadPoolImpl; -template class ThreadPoolImpl; - - -void ExceptionHandler::setException(std::exception_ptr && exception) -{ - std::unique_lock lock(mutex); - if (!first_exception) - first_exception = std::move(exception); -} - -void ExceptionHandler::throwIfException() -{ - std::unique_lock lock(mutex); - if (first_exception) - std::rethrow_exception(first_exception); -} - - -ThreadPool::Job createExceptionHandledJob(ThreadPool::Job job, ExceptionHandler & handler) -{ - return [job{std::move(job)}, &handler] () - { - try - { - job(); - } - catch (...) - { - handler.setException(std::current_exception()); - } - }; -} - diff --git a/libs/libcommon/src/tests/CMakeLists.txt b/libs/libcommon/src/tests/CMakeLists.txt index 2d46cb7146d..355c6679362 100644 --- a/libs/libcommon/src/tests/CMakeLists.txt +++ b/libs/libcommon/src/tests/CMakeLists.txt @@ -5,7 +5,6 @@ add_executable (date_lut2 date_lut2.cpp) add_executable (date_lut3 date_lut3.cpp) add_executable (date_lut4 date_lut4.cpp) add_executable (date_lut_default_timezone date_lut_default_timezone.cpp) -add_executable (multi_version multi_version.cpp) add_executable (local_date_time_comparison local_date_time_comparison.cpp) add_executable (realloc-perf allocator.cpp) @@ -16,7 +15,6 @@ target_link_libraries (date_lut2 common ${PLATFORM_LIBS}) target_link_libraries (date_lut3 common ${PLATFORM_LIBS}) target_link_libraries (date_lut4 common ${PLATFORM_LIBS}) target_link_libraries (date_lut_default_timezone common ${PLATFORM_LIBS}) -target_link_libraries (multi_version common) target_link_libraries (local_date_time_comparison common) target_link_libraries (realloc-perf common) add_check(multi_version) diff --git a/libs/libcommon/src/tests/multi_version.cpp b/libs/libcommon/src/tests/multi_version.cpp deleted file mode 100644 index 9cab2e095e4..00000000000 --- a/libs/libcommon/src/tests/multi_version.cpp +++ /dev/null @@ -1,56 +0,0 @@ -#include -#include -#include -#include -#include -#include - - -using T = std::string; -using MV = MultiVersion; -using Results = std::vector; - - -void thread1(MV & x, T & result) -{ - MV::Version v = x.get(); - result = *v; -} - -void thread2(MV & x, const char * result) -{ - x.set(std::make_unique(result)); -} - - -int main(int argc, char ** argv) -{ - try - { - const char * s1 = "Hello!"; - const char * s2 = "Goodbye!"; - - size_t n = 1000; - MV x(std::make_unique(s1)); - Results results(n); - - ThreadPool tp(8); - for (size_t i = 0; i < n; ++i) - { - tp.schedule(std::bind(thread1, std::ref(x), std::ref(results[i]))); - tp.schedule(std::bind(thread2, std::ref(x), (rand() % 2) ? s1 : s2)); - } - tp.wait(); - - for (size_t i = 0; i < n; ++i) - std::cerr << results[i] << " "; - std::cerr << std::endl; - } - catch (const Poco::Exception & e) - { - std::cerr << e.message() << std::endl; - throw; - } - - return 0; -} From 0afee1df23f631247803963779e2ee9b31ec0189 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 11 Jan 2019 22:27:24 +0300 Subject: [PATCH 058/586] Attempt to implemnt global thread pool #4018 --- dbms/src/Common/tests/thread_pool_2.cpp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 dbms/src/Common/tests/thread_pool_2.cpp diff --git a/dbms/src/Common/tests/thread_pool_2.cpp b/dbms/src/Common/tests/thread_pool_2.cpp new file mode 100644 index 00000000000..64a04c471f6 --- /dev/null +++ b/dbms/src/Common/tests/thread_pool_2.cpp @@ -0,0 +1,21 @@ +#include +#include +#include + + +int main(int, char **) +{ + std::atomic res{0}; + + for (size_t i = 0; i < 1000; ++i) + { + size_t threads = 16; + ThreadPool pool(threads); + for (size_t j = 0; j < threads; ++j) + pool.schedule([&]{ ++res; }); + pool.wait(); + } + + std::cerr << res << "\n"; + return 0; +} From 183ddea5187eac83ec91f43a0c841f55937109e0 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 13 Jan 2019 19:04:41 +0300 Subject: [PATCH 059/586] added parsing --- dbms/src/Parsers/ASTAlterQuery.cpp | 20 +++++++++++++++++ dbms/src/Parsers/ASTAlterQuery.h | 12 ++++++++++ dbms/src/Parsers/ParserAlterQuery.cpp | 32 +++++++++++++++++++++++++++ 3 files changed, 64 insertions(+) diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp index c5cdf1475e3..893129a6f17 100644 --- a/dbms/src/Parsers/ASTAlterQuery.cpp +++ b/dbms/src/Parsers/ASTAlterQuery.cpp @@ -82,6 +82,26 @@ void ASTAlterCommand::formatImpl( settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY ORDER BY " << (settings.hilite ? hilite_none : ""); order_by->formatImpl(settings, state, frame); } + else if (type == ASTAlterCommand::ADD_INDEX) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD INDEX " << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : ""); + index_decl->formatImpl(settings, state, frame); + + /// AFTER + if (index) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " AFTER " << (settings.hilite ? hilite_none : ""); + index->formatImpl(settings, state, frame); + } + throw Exception("ADD/DROP INDEX is not implemented", ErrorCodes::LOGICAL_ERROR); + } + else if (type == ASTAlterCommand::DROP_INDEX) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str + << "DROP INDEX " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : ""); + index->formatImpl(settings, state, frame); + throw Exception("ADD/DROP INDEX is not implemented", ErrorCodes::LOGICAL_ERROR); + } else if (type == ASTAlterCommand::DROP_PARTITION) { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << (detach ? "DETACH" : "DROP") << " PARTITION " diff --git a/dbms/src/Parsers/ASTAlterQuery.h b/dbms/src/Parsers/ASTAlterQuery.h index a6759482a56..2c77e2031de 100644 --- a/dbms/src/Parsers/ASTAlterQuery.h +++ b/dbms/src/Parsers/ASTAlterQuery.h @@ -28,6 +28,9 @@ public: COMMENT_COLUMN, MODIFY_ORDER_BY, + ADD_INDEX, + DROP_INDEX, + DROP_PARTITION, ATTACH_PARTITION, REPLACE_PARTITION, @@ -58,6 +61,15 @@ public: */ ASTPtr order_by; + /** The ADD INDEX query stores the IndexDeclaration there. + */ + ASTPtr index_decl; + + /** The ADD INDEX query stores the name of the index following AFTER. + * The DROP INDEX query stores the name for deletion. + */ + ASTPtr index; + /** Used in DROP PARTITION and ATTACH PARTITION FROM queries. * The value or ID of the partition is stored here. */ diff --git a/dbms/src/Parsers/ParserAlterQuery.cpp b/dbms/src/Parsers/ParserAlterQuery.cpp index b17467ed365..7216453c364 100644 --- a/dbms/src/Parsers/ParserAlterQuery.cpp +++ b/dbms/src/Parsers/ParserAlterQuery.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -26,6 +27,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserKeyword s_comment_column("COMMENT COLUMN"); ParserKeyword s_modify_order_by("MODIFY ORDER BY"); + ParserKeyword s_add_index("ADD INDEX"); + ParserKeyword s_drop_index("DROP INDEX"); + ParserKeyword s_attach_partition("ATTACH PARTITION"); ParserKeyword s_detach_partition("DETACH PARTITION"); ParserKeyword s_drop_partition("DROP PARTITION"); @@ -50,6 +54,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserCompoundIdentifier parser_name; ParserStringLiteral parser_string_literal; ParserCompoundColumnDeclaration parser_col_decl; + ParserIndexDeclaration parser_idx_decl; ParserCompoundColumnDeclaration parser_modify_col_decl(false); ParserPartition parser_partition; ParserExpression parser_exp_elem; @@ -91,6 +96,33 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->type = ASTAlterCommand::DROP_COLUMN; command->detach = false; } + else if (s_add_index.ignore(pos, expected)) + { + if (s_if_not_exists.ignore(pos, expected)) + command->if_not_exists = true; + + if (!parser_idx_decl.parse(pos, command->index_decl, expected)) + return false; + + if (s_after.ignore(pos, expected)) + { + if (!parser_name.parse(pos, command->index, expected)) + return false; + } + + command->type = ASTAlterCommand::ADD_INDEX; + } + else if (s_drop_index.ignore(pos, expected)) + { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; + + if (!parser_name.parse(pos, command->index, expected)) + return false; + + command->type = ASTAlterCommand::DROP_INDEX; + command->detach = false; + } else if (s_clear_column.ignore(pos, expected)) { if (s_if_exists.ignore(pos, expected)) From 4f48ebd971e19c4e9d61d68be77b085a446aa2bb Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 13 Jan 2019 19:05:21 +0300 Subject: [PATCH 060/586] removed throw --- dbms/src/Parsers/ASTAlterQuery.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp index 893129a6f17..12491c83762 100644 --- a/dbms/src/Parsers/ASTAlterQuery.cpp +++ b/dbms/src/Parsers/ASTAlterQuery.cpp @@ -93,14 +93,12 @@ void ASTAlterCommand::formatImpl( settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " AFTER " << (settings.hilite ? hilite_none : ""); index->formatImpl(settings, state, frame); } - throw Exception("ADD/DROP INDEX is not implemented", ErrorCodes::LOGICAL_ERROR); } else if (type == ASTAlterCommand::DROP_INDEX) { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "DROP INDEX " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : ""); index->formatImpl(settings, state, frame); - throw Exception("ADD/DROP INDEX is not implemented", ErrorCodes::LOGICAL_ERROR); } else if (type == ASTAlterCommand::DROP_PARTITION) { From d0db87b41222ce1efb975a28e62062299f84130c Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 13 Jan 2019 20:23:08 +0300 Subject: [PATCH 061/586] alter cmds parse --- dbms/src/Storages/AlterCommands.cpp | 31 +++++++++++++++++++++++++++++ dbms/src/Storages/AlterCommands.h | 9 +++++++++ 2 files changed, 40 insertions(+) diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index b5fbe0f3314..2cde9562c82 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -113,6 +114,36 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.order_by = command_ast->order_by; return command; } + else if (command_ast->type == ASTAlterCommand::ADD_INDEX) + { + AlterCommand command; + command.type = AlterCommand::ADD_INDEX; + + const auto & ast_index_decl = typeid_cast(*command_ast->index_decl); + + command.index_name = ast_index_decl.name; + + if (command_ast->index) + command.after_index_name = typeid_cast(*command_ast->index).name; + + command.if_not_exists = command_ast->if_not_exists; + + throw Exception("\"ALTER TABLE table ADD/DROP INDEX ...\" queries are not supported yet.", ErrorCodes::NOT_IMPLEMENTED); + return command; + } + else if (command_ast->type == ASTAlterCommand::DROP_INDEX) + { + if (command_ast->clear_column) + throw Exception("\"ALTER TABLE table CLEAR COLUMN column\" queries are not supported yet. Use \"CLEAR COLUMN column IN PARTITION\".", ErrorCodes::NOT_IMPLEMENTED); + + AlterCommand command; + command.type = AlterCommand::DROP_INDEX; + command.index_name = typeid_cast(*(command_ast->index)).name; + command.if_exists = command_ast->if_exists; + + throw Exception("\"ALTER TABLE table ADD/DROP INDEX ...\" queries are not supported yet.", ErrorCodes::NOT_IMPLEMENTED); + return command; + } else return {}; } diff --git a/dbms/src/Storages/AlterCommands.h b/dbms/src/Storages/AlterCommands.h index c8d46dd5764..a465a8412ed 100644 --- a/dbms/src/Storages/AlterCommands.h +++ b/dbms/src/Storages/AlterCommands.h @@ -23,6 +23,8 @@ struct AlterCommand MODIFY_COLUMN, COMMENT_COLUMN, MODIFY_ORDER_BY, + ADD_INDEX, + DROP_INDEX, UKNOWN_TYPE, }; @@ -52,6 +54,13 @@ struct AlterCommand /// For MODIFY_ORDER_BY ASTPtr order_by; + /// For ADD INDEX + ASTPtr index_decl; + String after_index_name; + + /// For ADD/DROP INDEX + String index_name; + /// indicates that this command should not be applied, for example in case of if_exists=true and column doesn't exist. bool ignore = false; From a6bf5a7e28a8e49dc386ca48317bbab2839cc444 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 13 Jan 2019 20:49:21 +0300 Subject: [PATCH 062/586] Addition to prev. revision --- dbms/src/Common/MultiVersion.h | 56 ++++++++ dbms/src/Common/ThreadPool.cpp | 168 ++++++++++++++++++++++++ dbms/src/Common/ThreadPool.h | 123 +++++++++++++++++ dbms/src/Common/tests/CMakeLists.txt | 1 + dbms/src/Common/tests/multi_version.cpp | 56 ++++++++ libs/libcommon/src/tests/CMakeLists.txt | 1 - 6 files changed, 404 insertions(+), 1 deletion(-) create mode 100644 dbms/src/Common/MultiVersion.h create mode 100644 dbms/src/Common/ThreadPool.cpp create mode 100644 dbms/src/Common/ThreadPool.h create mode 100644 dbms/src/Common/tests/multi_version.cpp diff --git a/dbms/src/Common/MultiVersion.h b/dbms/src/Common/MultiVersion.h new file mode 100644 index 00000000000..3014689f861 --- /dev/null +++ b/dbms/src/Common/MultiVersion.h @@ -0,0 +1,56 @@ +#pragma once + +#include +#include + + +/** Allow to store and read-only usage of an object in several threads, + * and to atomically replace an object in another thread. + * The replacement is atomic and reading threads can work with different versions of an object. + * + * Usage: + * MultiVersion x; + * - on data update: + * x.set(new value); + * - on read-only usage: + * { + * MultiVersion::Version current_version = x.get(); + * // use *current_version + * } // now we finish own current version; if the version is outdated and no one else is using it - it will be destroyed. + * + * All methods are thread-safe. + */ +template +class MultiVersion +{ +public: + /// Version of object for usage. shared_ptr manage lifetime of version. + using Version = std::shared_ptr; + + /// Default initialization - by nullptr. + MultiVersion() = default; + + MultiVersion(std::unique_ptr && value) + { + set(std::move(value)); + } + + /// Obtain current version for read-only usage. Returns shared_ptr, that manages lifetime of version. + Version get() const + { + /// NOTE: is it possible to lock-free replace of shared_ptr? + std::lock_guard lock(mutex); + return current_version; + } + + /// Update an object with new version. + void set(std::unique_ptr && value) + { + std::lock_guard lock(mutex); + current_version = std::move(value); + } + +private: + Version current_version; + mutable std::mutex mutex; +}; diff --git a/dbms/src/Common/ThreadPool.cpp b/dbms/src/Common/ThreadPool.cpp new file mode 100644 index 00000000000..698c442fc9d --- /dev/null +++ b/dbms/src/Common/ThreadPool.cpp @@ -0,0 +1,168 @@ +#include + + +template +ThreadPoolImpl::ThreadPoolImpl(size_t num_threads) + : ThreadPoolImpl(num_threads, num_threads) +{ +} + +template +ThreadPoolImpl::ThreadPoolImpl(size_t num_threads, size_t queue_size) + : num_threads(num_threads), queue_size(queue_size) +{ + threads.reserve(num_threads); +} + +template +void ThreadPoolImpl::schedule(Job job, int priority) +{ + { + std::unique_lock lock(mutex); + job_finished.wait(lock, [this] { return !queue_size || active_jobs < queue_size || shutdown; }); + if (shutdown) + return; + + jobs.emplace(std::move(job), priority); + ++active_jobs; + + if (threads.size() < std::min(num_threads, active_jobs)) + threads.emplace_back([this] { worker(); }); + } + new_job_or_shutdown.notify_one(); +} + +template +void ThreadPoolImpl::wait() +{ + { + std::unique_lock lock(mutex); + job_finished.wait(lock, [this] { return active_jobs == 0; }); + + if (first_exception) + { + std::exception_ptr exception; + std::swap(exception, first_exception); + std::rethrow_exception(exception); + } + } +} + +template +ThreadPoolImpl::~ThreadPoolImpl() +{ + finalize(); +} + +template +void ThreadPoolImpl::finalize() +{ + { + std::unique_lock lock(mutex); + shutdown = true; + } + + new_job_or_shutdown.notify_all(); + + for (auto & thread : threads) + thread.join(); + + threads.clear(); +} + +template +size_t ThreadPoolImpl::active() const +{ + std::unique_lock lock(mutex); + return active_jobs; +} + +template +void ThreadPoolImpl::worker() +{ + while (true) + { + Job job; + bool need_shutdown = false; + + { + std::unique_lock lock(mutex); + new_job_or_shutdown.wait(lock, [this] { return shutdown || !jobs.empty(); }); + need_shutdown = shutdown; + + if (!jobs.empty()) + { + job = jobs.top().job; + jobs.pop(); + } + else + { + return; + } + } + + if (!need_shutdown) + { + try + { + job(); + } + catch (...) + { + { + std::unique_lock lock(mutex); + if (!first_exception) + first_exception = std::current_exception(); + shutdown = true; + --active_jobs; + } + job_finished.notify_all(); + new_job_or_shutdown.notify_all(); + return; + } + } + + { + std::unique_lock lock(mutex); + --active_jobs; + } + + job_finished.notify_all(); + } +} + + +template class ThreadPoolImpl; +template class ThreadPoolImpl; + + +void ExceptionHandler::setException(std::exception_ptr && exception) +{ + std::unique_lock lock(mutex); + if (!first_exception) + first_exception = std::move(exception); +} + +void ExceptionHandler::throwIfException() +{ + std::unique_lock lock(mutex); + if (first_exception) + std::rethrow_exception(first_exception); +} + + +ThreadPool::Job createExceptionHandledJob(ThreadPool::Job job, ExceptionHandler & handler) +{ + return [job{std::move(job)}, &handler] () + { + try + { + job(); + } + catch (...) + { + handler.setException(std::current_exception()); + } + }; +} + diff --git a/dbms/src/Common/ThreadPool.h b/dbms/src/Common/ThreadPool.h new file mode 100644 index 00000000000..02e1a02c58e --- /dev/null +++ b/dbms/src/Common/ThreadPool.h @@ -0,0 +1,123 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + + +/** Very simple thread pool similar to boost::threadpool. + * Advantages: + * - catches exceptions and rethrows on wait. + */ + +template +class ThreadPoolImpl +{ +public: + using Job = std::function; + + /// Size is constant. Up to num_threads are created on demand and then run until shutdown. + explicit ThreadPoolImpl(size_t num_threads); + + /// queue_size - maximum number of running plus scheduled jobs. It can be greater than num_threads. Zero means unlimited. + ThreadPoolImpl(size_t num_threads, size_t queue_size); + + /// Add new job. Locks until number of active jobs is less than maximum or exception in one of threads was thrown. + /// If an exception in some thread was thrown, method silently returns, and exception will be rethrown only on call to 'wait' function. + /// Priority: greater is higher. + void schedule(Job job, int priority = 0); + + /// Wait for all currently active jobs to be done. + /// You may call schedule and wait many times in arbitary order. + /// If any thread was throw an exception, first exception will be rethrown from this method, + /// and exception will be cleared. + void wait(); + + /// Waits for all threads. Doesn't rethrow exceptions (use 'wait' method to rethrow exceptions). + /// You should not destroy object while calling schedule or wait methods from another threads. + ~ThreadPoolImpl(); + + size_t size() const { return num_threads; } + + /// Returns number of running and scheduled jobs. + size_t active() const; + +private: + mutable std::mutex mutex; + std::condition_variable job_finished; + std::condition_variable new_job_or_shutdown; + + const size_t num_threads; + const size_t queue_size; + + size_t active_jobs = 0; + bool shutdown = false; + + struct JobWithPriority + { + Job job; + int priority; + + JobWithPriority(Job job, int priority) + : job(job), priority(priority) {} + + bool operator< (const JobWithPriority & rhs) const + { + return priority < rhs.priority; + } + }; + + std::priority_queue jobs; + std::vector threads; + std::exception_ptr first_exception; + + + void worker(); + + void finalize(); +}; + + +using FreeThreadPool = ThreadPoolImpl; + +class GlobalThreadPool : public FreeThreadPool, public ext::singleton +{ +public: + GlobalThreadPool() : FreeThreadPool(10000) {} /// TODO: global blocking limit may lead to deadlocks. +}; + +class ThreadFromGlobalPool +{ +public: + ThreadFromGlobalPool(std::function func) + { + GlobalThreadPool::instance().schedule(func); + } + + void join() + { + /// noop, the std::thread will continue to run inside global pool. + } +}; + +using ThreadPool = ThreadPoolImpl; + + +/// Allows to save first catched exception in jobs and postpone its rethrow. +class ExceptionHandler +{ +public: + void setException(std::exception_ptr && exception); + void throwIfException(); + +private: + std::exception_ptr first_exception; + std::mutex mutex; +}; + +ThreadPool::Job createExceptionHandledJob(ThreadPool::Job job, ExceptionHandler & handler); diff --git a/dbms/src/Common/tests/CMakeLists.txt b/dbms/src/Common/tests/CMakeLists.txt index 163b52991ae..e4a72bbdeac 100644 --- a/dbms/src/Common/tests/CMakeLists.txt +++ b/dbms/src/Common/tests/CMakeLists.txt @@ -58,6 +58,7 @@ target_link_libraries (thread_pool_2 PRIVATE clickhouse_common_io) add_executable (multi_version multi_version.cpp) target_link_libraries (multi_version PRIVATE clickhouse_common_io) +add_check(multi_version) add_executable (array_cache array_cache.cpp) target_link_libraries (array_cache PRIVATE clickhouse_common_io) diff --git a/dbms/src/Common/tests/multi_version.cpp b/dbms/src/Common/tests/multi_version.cpp new file mode 100644 index 00000000000..0db6d74f4aa --- /dev/null +++ b/dbms/src/Common/tests/multi_version.cpp @@ -0,0 +1,56 @@ +#include +#include +#include +#include +#include +#include + + +using T = std::string; +using MV = MultiVersion; +using Results = std::vector; + + +void thread1(MV & x, T & result) +{ + MV::Version v = x.get(); + result = *v; +} + +void thread2(MV & x, const char * result) +{ + x.set(std::make_unique(result)); +} + + +int main(int argc, char ** argv) +{ + try + { + const char * s1 = "Hello!"; + const char * s2 = "Goodbye!"; + + size_t n = 1000; + MV x(std::make_unique(s1)); + Results results(n); + + ThreadPool tp(8); + for (size_t i = 0; i < n; ++i) + { + tp.schedule(std::bind(thread1, std::ref(x), std::ref(results[i]))); + tp.schedule(std::bind(thread2, std::ref(x), (rand() % 2) ? s1 : s2)); + } + tp.wait(); + + for (size_t i = 0; i < n; ++i) + std::cerr << results[i] << " "; + std::cerr << std::endl; + } + catch (const Poco::Exception & e) + { + std::cerr << e.message() << std::endl; + throw; + } + + return 0; +} diff --git a/libs/libcommon/src/tests/CMakeLists.txt b/libs/libcommon/src/tests/CMakeLists.txt index 355c6679362..35a7bddaa3a 100644 --- a/libs/libcommon/src/tests/CMakeLists.txt +++ b/libs/libcommon/src/tests/CMakeLists.txt @@ -17,7 +17,6 @@ target_link_libraries (date_lut4 common ${PLATFORM_LIBS}) target_link_libraries (date_lut_default_timezone common ${PLATFORM_LIBS}) target_link_libraries (local_date_time_comparison common) target_link_libraries (realloc-perf common) -add_check(multi_version) add_check(local_date_time_comparison) add_executable (unit_tests_libcommon gtest_json_test.cpp gtest_strong_typedef.cpp gtest_find_symbols.cpp) From 00a4b2cf8ac68cb0cc9e8e3ac1fc2826f0a05d7c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 13 Jan 2019 21:51:57 +0300 Subject: [PATCH 063/586] Removing garbage, part 1 --- dbms/src/Common/CurrentThread.cpp | 64 +++++------------------ dbms/src/Common/CurrentThread.h | 21 +------- dbms/src/Common/ThreadStatus.cpp | 15 ++++-- dbms/src/Common/ThreadStatus.h | 22 ++++---- dbms/src/Common/tests/multi_version.cpp | 2 +- dbms/src/Common/tests/thread_pool_2.cpp | 2 +- dbms/src/Interpreters/ThreadStatusExt.cpp | 33 ++++++------ utils/iotest/iotest.cpp | 2 +- utils/iotest/iotest_aio.cpp | 9 ++-- utils/iotest/iotest_nonblock.cpp | 2 +- 10 files changed, 61 insertions(+), 111 deletions(-) diff --git a/dbms/src/Common/CurrentThread.cpp b/dbms/src/Common/CurrentThread.cpp index b2f165e5469..8c05c91bac3 100644 --- a/dbms/src/Common/CurrentThread.cpp +++ b/dbms/src/Common/CurrentThread.cpp @@ -2,6 +2,7 @@ #include "CurrentThread.h" #include +#include #include #include #include @@ -10,11 +11,6 @@ #include -#if defined(ARCADIA_ROOT) -# include -#endif - - namespace DB { @@ -23,91 +19,59 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -// Smoker's implementation to avoid thread_local usage: error: undefined symbol: __cxa_thread_atexit -#if defined(ARCADIA_ROOT) -struct ThreadStatusPtrHolder : ThreadStatusPtr -{ - ThreadStatusPtrHolder() { ThreadStatusPtr::operator=(ThreadStatus::create()); } -}; -struct ThreadScopePtrHolder : CurrentThread::ThreadScopePtr -{ - ThreadScopePtrHolder() { CurrentThread::ThreadScopePtr::operator=(std::make_shared()); } -}; -# define current_thread (*FastTlsSingleton()) -# define current_thread_scope (*FastTlsSingleton()) -#else -/// Order of current_thread and current_thread_scope matters -thread_local ThreadStatusPtr _current_thread = ThreadStatus::create(); -thread_local CurrentThread::ThreadScopePtr _current_thread_scope = std::make_shared(); -# define current_thread _current_thread -# define current_thread_scope _current_thread_scope -#endif - void CurrentThread::updatePerformanceCounters() { - get()->updatePerformanceCounters(); + get().updatePerformanceCounters(); } -ThreadStatusPtr CurrentThread::get() +ThreadStatus & CurrentThread::get() { -#ifndef NDEBUG - if (!current_thread || current_thread.use_count() <= 0) + if (unlikely(!current_thread)) throw Exception("Thread #" + std::to_string(Poco::ThreadNumber::get()) + " status was not initialized", ErrorCodes::LOGICAL_ERROR); - if (Poco::ThreadNumber::get() != current_thread->thread_number) - throw Exception("Current thread has different thread number", ErrorCodes::LOGICAL_ERROR); -#endif - - return current_thread; -} - -CurrentThread::ThreadScopePtr CurrentThread::getScope() -{ - return current_thread_scope; + return *current_thread; } ProfileEvents::Counters & CurrentThread::getProfileEvents() { - return current_thread->performance_counters; + return get().performance_counters; } MemoryTracker & CurrentThread::getMemoryTracker() { - return current_thread->memory_tracker; + return get().memory_tracker; } void CurrentThread::updateProgressIn(const Progress & value) { - current_thread->progress_in.incrementPiecewiseAtomically(value); + get().progress_in.incrementPiecewiseAtomically(value); } void CurrentThread::updateProgressOut(const Progress & value) { - current_thread->progress_out.incrementPiecewiseAtomically(value); + get().progress_out.incrementPiecewiseAtomically(value); } void CurrentThread::attachInternalTextLogsQueue(const std::shared_ptr & logs_queue) { - get()->attachInternalTextLogsQueue(logs_queue); + get().attachInternalTextLogsQueue(logs_queue); } std::shared_ptr CurrentThread::getInternalTextLogsQueue() { /// NOTE: this method could be called at early server startup stage - /// NOTE: this method could be called in ThreadStatus destructor, therefore we make use_count() check just in case - - if (!current_thread || current_thread.use_count() <= 0) + if (!current_thread) return nullptr; - if (current_thread->getCurrentState() == ThreadStatus::ThreadState::Died) + if (get().getCurrentState() == ThreadStatus::ThreadState::Died) return nullptr; - return current_thread->getInternalTextLogsQueue(); + return get().getInternalTextLogsQueue(); } ThreadGroupStatusPtr CurrentThread::getGroup() { - return get()->getThreadGroup(); + return get().getThreadGroup(); } } diff --git a/dbms/src/Common/CurrentThread.h b/dbms/src/Common/CurrentThread.h index 9820b3620ce..60e7993b5fc 100644 --- a/dbms/src/Common/CurrentThread.h +++ b/dbms/src/Common/CurrentThread.h @@ -32,7 +32,7 @@ class CurrentThread { public: /// Handler to current thread - static ThreadStatusPtr get(); + static ThreadStatus & get(); /// Group to which belongs current thread static ThreadGroupStatusPtr getGroup(); @@ -85,25 +85,6 @@ public: bool log_peak_memory_usage_in_destructor = true; }; - /// Implicitly finalizes current thread in the destructor - class ThreadScope - { - public: - void (*deleter)() = nullptr; - - ThreadScope() = default; - ~ThreadScope() - { - if (deleter) - deleter(); - - /// std::terminate on exception: this is Ok. - } - }; - - using ThreadScopePtr = std::shared_ptr; - static ThreadScopePtr getScope(); - private: static void defaultThreadDeleter(); }; diff --git a/dbms/src/Common/ThreadStatus.cpp b/dbms/src/Common/ThreadStatus.cpp index 0ee09d527ce..3d66c33f032 100644 --- a/dbms/src/Common/ThreadStatus.cpp +++ b/dbms/src/Common/ThreadStatus.cpp @@ -21,10 +21,13 @@ namespace ErrorCodes } +thread_local ThreadStatusPtr current_thread = nullptr; + + TasksStatsCounters TasksStatsCounters::current() { TasksStatsCounters res; - CurrentThread::get()->taskstats_getter->getStat(res.stat, CurrentThread::get()->os_thread_id); + CurrentThread::get().taskstats_getter->getStat(res.stat, CurrentThread::get().os_thread_id); return res; } @@ -39,17 +42,19 @@ ThreadStatus::ThreadStatus() memory_tracker.setDescription("(for thread)"); log = &Poco::Logger::get("ThreadStatus"); + current_thread = this; + /// NOTE: It is important not to do any non-trivial actions (like updating ProfileEvents or logging) before ThreadStatus is created /// Otherwise it could lead to SIGSEGV due to current_thread dereferencing } -ThreadStatusPtr ThreadStatus::create() +ThreadStatus::~ThreadStatus() { - return ThreadStatusPtr(new ThreadStatus); + if (deleter) + deleter(); + current_thread = nullptr; } -ThreadStatus::~ThreadStatus() = default; - void ThreadStatus::initPerformanceCounters() { performance_counters_finalized = false; diff --git a/dbms/src/Common/ThreadStatus.h b/dbms/src/Common/ThreadStatus.h index 3f7a91a54f0..19c60f5cfc7 100644 --- a/dbms/src/Common/ThreadStatus.h +++ b/dbms/src/Common/ThreadStatus.h @@ -9,6 +9,8 @@ #include #include #include +#include +#include namespace Poco @@ -23,7 +25,7 @@ namespace DB class Context; class QueryStatus; class ThreadStatus; -using ThreadStatusPtr = std::shared_ptr; +using ThreadStatusPtr = ThreadStatus*; class QueryThreadLog; struct TasksStatsCounters; struct RUsageCounters; @@ -67,14 +69,20 @@ public: using ThreadGroupStatusPtr = std::shared_ptr; +extern thread_local ThreadStatusPtr current_thread; + /** Encapsulates all per-thread info (ProfileEvents, MemoryTracker, query_id, query context, etc.). - * Used inside thread-local variable. See variables in CurrentThread.cpp + * The object must be created in thread function and destroyed in the same thread before the exit. + * It is accessed through thread-local pointer. * * This object should be used only via "CurrentThread", see CurrentThread.h */ -class ThreadStatus : public std::enable_shared_from_this +class ThreadStatus : public boost::noncopyable { public: + ThreadStatus(); + ~ThreadStatus(); + /// Poco's thread number (the same number is used in logs) UInt32 thread_number = 0; /// Linux's PID (or TGID) (the same id is shown by ps util) @@ -88,8 +96,8 @@ public: Progress progress_in; Progress progress_out; -public: - static ThreadStatusPtr create(); + using Deleter = std::function; + Deleter deleter; ThreadGroupStatusPtr getThreadGroup() const { @@ -136,11 +144,7 @@ public: /// Detaches thread from the thread group and the query, dumps performance counters if they have not been dumped void detachQuery(bool exit_if_already_detached = false, bool thread_exits = false); - ~ThreadStatus(); - protected: - ThreadStatus(); - void initPerformanceCounters(); void logToQueryThreadLog(QueryThreadLog & thread_log); diff --git a/dbms/src/Common/tests/multi_version.cpp b/dbms/src/Common/tests/multi_version.cpp index 0db6d74f4aa..605cb3f0d62 100644 --- a/dbms/src/Common/tests/multi_version.cpp +++ b/dbms/src/Common/tests/multi_version.cpp @@ -23,7 +23,7 @@ void thread2(MV & x, const char * result) } -int main(int argc, char ** argv) +int main(int, char **) { try { diff --git a/dbms/src/Common/tests/thread_pool_2.cpp b/dbms/src/Common/tests/thread_pool_2.cpp index 64a04c471f6..029c3695e36 100644 --- a/dbms/src/Common/tests/thread_pool_2.cpp +++ b/dbms/src/Common/tests/thread_pool_2.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include int main(int, char **) diff --git a/dbms/src/Interpreters/ThreadStatusExt.cpp b/dbms/src/Interpreters/ThreadStatusExt.cpp index eac9251cdf0..20a58de98f8 100644 --- a/dbms/src/Interpreters/ThreadStatusExt.cpp +++ b/dbms/src/Interpreters/ThreadStatusExt.cpp @@ -36,7 +36,7 @@ String ThreadStatus::getQueryID() void CurrentThread::defaultThreadDeleter() { - ThreadStatus & thread = *CurrentThread::get(); + ThreadStatus & thread = CurrentThread::get(); LOG_TRACE(thread.log, "Thread " << thread.thread_number << " exited"); thread.detachQuery(true, true); } @@ -51,8 +51,8 @@ void ThreadStatus::initializeQuery() memory_tracker.setParent(&thread_group->memory_tracker); thread_group->memory_tracker.setDescription("(for query)"); - thread_group->master_thread = shared_from_this(); - thread_group->thread_statuses.emplace(thread_number, shared_from_this()); + thread_group->master_thread = this; + thread_group->thread_statuses.emplace(thread_number, this); initPerformanceCounters(); thread_state = ThreadState::AttachedToQuery; @@ -87,7 +87,7 @@ void ThreadStatus::attachQuery(const ThreadGroupStatusPtr & thread_group_, bool if (!global_context) global_context = thread_group->global_context; - if (!thread_group->thread_statuses.emplace(thread_number, shared_from_this()).second) + if (!thread_group->thread_statuses.emplace(thread_number, this).second) throw Exception("Thread " + std::to_string(thread_number) + " is attached twice", ErrorCodes::LOGICAL_ERROR); } @@ -193,48 +193,47 @@ void ThreadStatus::logToQueryThreadLog(QueryThreadLog & thread_log) void CurrentThread::initializeQuery() { - get()->initializeQuery(); - getScope()->deleter = CurrentThread::defaultThreadDeleter; + get().initializeQuery(); + get().deleter = CurrentThread::defaultThreadDeleter; } void CurrentThread::attachTo(const ThreadGroupStatusPtr & thread_group) { - get()->attachQuery(thread_group, true); - getScope()->deleter = CurrentThread::defaultThreadDeleter; + get().attachQuery(thread_group, true); + get().deleter = CurrentThread::defaultThreadDeleter; } void CurrentThread::attachToIfDetached(const ThreadGroupStatusPtr & thread_group) { - get()->attachQuery(thread_group, false); - getScope()->deleter = CurrentThread::defaultThreadDeleter; + get().attachQuery(thread_group, false); + get().deleter = CurrentThread::defaultThreadDeleter; } std::string CurrentThread::getCurrentQueryID() { - if (!get() || get().use_count() <= 0) + if (!current_thread) return {}; - - return get()->getQueryID(); + return get().getQueryID(); } void CurrentThread::attachQueryContext(Context & query_context) { - return get()->attachQueryContext(query_context); + return get().attachQueryContext(query_context); } void CurrentThread::finalizePerformanceCounters() { - get()->finalizePerformanceCounters(); + get().finalizePerformanceCounters(); } void CurrentThread::detachQuery() { - get()->detachQuery(false); + get().detachQuery(false); } void CurrentThread::detachQueryIfNotDetached() { - get()->detachQuery(true); + get().detachQuery(true); } diff --git a/utils/iotest/iotest.cpp b/utils/iotest/iotest.cpp index 3134a49056d..499eb9b464f 100644 --- a/utils/iotest/iotest.cpp +++ b/utils/iotest/iotest.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/utils/iotest/iotest_aio.cpp b/utils/iotest/iotest_aio.cpp index 1dcb8ea2ae7..77846e1ca80 100644 --- a/utils/iotest/iotest_aio.cpp +++ b/utils/iotest/iotest_aio.cpp @@ -1,5 +1,5 @@ #if __APPLE__ || __FreeBSD__ -int main(int argc, char ** argv) { return 0; } +int main(int, char **) { return 0; } #else #include @@ -11,7 +11,7 @@ int main(int argc, char ** argv) { return 0; } #include #include #include -#include +#include #include #include #include @@ -22,10 +22,7 @@ int main(int argc, char ** argv) { return 0; } #include #include #include - -#if !defined(__APPLE__) && !defined(__FreeBSD__) - #include -#endif +#include #include diff --git a/utils/iotest/iotest_nonblock.cpp b/utils/iotest/iotest_nonblock.cpp index f85e8df91f6..62871f6c162 100644 --- a/utils/iotest/iotest_nonblock.cpp +++ b/utils/iotest/iotest_nonblock.cpp @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include From e197697b22c84916173cd241bb4e11dd5576c7b4 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 13 Jan 2019 22:31:48 +0300 Subject: [PATCH 064/586] fix --- dbms/src/Storages/AlterCommands.cpp | 38 +++++++++++++++++-- dbms/src/Storages/AlterCommands.h | 6 ++- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 5 ++- dbms/src/Storages/StorageMergeTree.cpp | 6 ++- .../Storages/StorageReplicatedMergeTree.cpp | 6 ++- 5 files changed, 49 insertions(+), 12 deletions(-) diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index 2cde9562c82..a32d37dcea3 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -156,7 +156,8 @@ static bool namesEqual(const String & name_without_dot, const DB::NameAndTypePai return (name_with_dot == name_type.name.substr(0, name_without_dot.length() + 1) || name_without_dot == name_type.name); } -void AlterCommand::apply(ColumnsDescription & columns_description, ASTPtr & order_by_ast, ASTPtr & primary_key_ast) const +void AlterCommand::apply(ColumnsDescription & columns_description, ASTPtr & order_by_ast, + ASTPtr & primary_key_ast, ASTPtr & indexes_decl_ast) const { if (type == ADD_COLUMN) { @@ -315,6 +316,29 @@ void AlterCommand::apply(ColumnsDescription & columns_description, ASTPtr & orde { columns_description.comments[column_name] = comment; } + else if (type == ADD_INDEX) + { + if (std::any_of( + indexes_decl_ast->children.cbegin(), + indexes_decl_ast->children.cend(), + [this](const ASTPtr & index_ast){ + return typeid_cast(*index_ast).name == index_name; + })) + { + if (if_not_exists) + return; + else + throw Exception{"Cannot add index " + index_name + ": index with this name already exists", + ErrorCodes::ILLEGAL_COLUMN}; + } + + //auto insert_it = indexes_decl_ast->children.end(); + // TODO: implementation + } + else if (type == DROP_INDEX) + { + // TODO: implementation + } else throw Exception("Wrong parameter type in ALTER query", ErrorCodes::LOGICAL_ERROR); } @@ -329,19 +353,22 @@ bool AlterCommand::is_mutable() const return true; } -void AlterCommands::apply(ColumnsDescription & columns_description, ASTPtr & order_by_ast, ASTPtr & primary_key_ast) const +void AlterCommands::apply(ColumnsDescription & columns_description, ASTPtr & order_by_ast, + ASTPtr & primary_key_ast, ASTPtr & indexes_decl_ast) const { auto new_columns_description = columns_description; auto new_order_by_ast = order_by_ast; auto new_primary_key_ast = primary_key_ast; + auto new_indexes_decl_ast = indexes_decl_ast; for (const AlterCommand & command : *this) if (!command.ignore) - command.apply(new_columns_description, new_order_by_ast, new_primary_key_ast); + command.apply(new_columns_description, new_order_by_ast, new_primary_key_ast, new_indexes_decl_ast); columns_description = std::move(new_columns_description); order_by_ast = std::move(new_order_by_ast); primary_key_ast = std::move(new_primary_key_ast); + indexes_decl_ast = std::move(new_indexes_decl_ast); } void AlterCommands::validate(const IStorage & table, const Context & context) @@ -558,12 +585,15 @@ void AlterCommands::apply(ColumnsDescription & columns_description) const auto out_columns_description = columns_description; ASTPtr out_order_by; ASTPtr out_primary_key; - apply(out_columns_description, out_order_by, out_primary_key); + ASTPtr out_indexes_decl; + apply(out_columns_description, out_order_by, out_primary_key, out_indexes_decl); if (out_order_by) throw Exception("Storage doesn't support modifying ORDER BY expression", ErrorCodes::NOT_IMPLEMENTED); if (out_primary_key) throw Exception("Storage doesn't support modifying PRIMARY KEY expression", ErrorCodes::NOT_IMPLEMENTED); + if (out_indexes_decl) + throw Exception("Storage doesn't support modifying INDEXES", ErrorCodes::NOT_IMPLEMENTED); columns_description = std::move(out_columns_description); } diff --git a/dbms/src/Storages/AlterCommands.h b/dbms/src/Storages/AlterCommands.h index a465a8412ed..cd4bb622a95 100644 --- a/dbms/src/Storages/AlterCommands.h +++ b/dbms/src/Storages/AlterCommands.h @@ -76,7 +76,8 @@ struct AlterCommand static std::optional parse(const ASTAlterCommand * command); - void apply(ColumnsDescription & columns_description, ASTPtr & order_by_ast, ASTPtr & primary_key_ast) const; + void apply(ColumnsDescription & columns_description, ASTPtr & order_by_ast, + ASTPtr & primary_key_ast, ASTPtr & indexes_decl_ast) const; /// Checks that not only metadata touched by that command bool is_mutable() const; }; @@ -87,7 +88,8 @@ class Context; class AlterCommands : public std::vector { public: - void apply(ColumnsDescription & columns_description, ASTPtr & order_by_ast, ASTPtr & primary_key_ast) const; + void apply(ColumnsDescription & columns_description, ASTPtr & order_by_ast, + ASTPtr & primary_key_ast, ASTPtr & index_decl_ast) const; /// For storages that don't support MODIFY_ORDER_BY. void apply(ColumnsDescription & columns_description) const; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index c2396e02988..3e51efa808d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1042,7 +1042,8 @@ void MergeTreeData::checkAlter(const AlterCommands & commands) auto new_columns = getColumns(); ASTPtr new_order_by_ast = order_by_ast; ASTPtr new_primary_key_ast = primary_key_ast; - commands.apply(new_columns, new_order_by_ast, new_primary_key_ast); + ASTPtr new_indexes_ast = skip_indexes_ast; + commands.apply(new_columns, new_order_by_ast, new_primary_key_ast, new_indexes_ast); /// Set of columns that shouldn't be altered. NameSet columns_alter_forbidden; @@ -1122,7 +1123,7 @@ void MergeTreeData::checkAlter(const AlterCommands & commands) } setPrimaryKeyAndColumns(new_order_by_ast, new_primary_key_ast, new_columns, /* only_check = */ true); - setSkipIndexes(skip_indexes_ast, /* only_check = */ true); + setSkipIndexes(new_indexes_ast, /* only_check = */ true); /// Check that type conversions are possible. ExpressionActionsPtr unused_expression; diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index f71a64662a4..1fd0f422975 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -216,7 +216,8 @@ void StorageMergeTree::alter( auto new_columns = data.getColumns(); ASTPtr new_order_by_ast = data.order_by_ast; ASTPtr new_primary_key_ast = data.primary_key_ast; - params.apply(new_columns, new_order_by_ast, new_primary_key_ast); + ASTPtr new_indexes_ast = data.skip_indexes_ast; + params.apply(new_columns, new_order_by_ast, new_primary_key_ast, new_indexes_ast); auto parts = data.getDataParts({MergeTreeDataPartState::PreCommitted, MergeTreeDataPartState::Committed, MergeTreeDataPartState::Outdated}); auto columns_for_parts = new_columns.getAllPhysical(); @@ -702,7 +703,8 @@ void StorageMergeTree::clearColumnInPartition(const ASTPtr & partition, const Fi auto new_columns = getColumns(); ASTPtr ignored_order_by_ast; ASTPtr ignored_primary_key_ast; - alter_command.apply(new_columns, ignored_order_by_ast, ignored_primary_key_ast); + ASTPtr ignored_indexes_ast; + alter_command.apply(new_columns, ignored_order_by_ast, ignored_primary_key_ast, ignored_indexes_ast); auto columns_for_parts = new_columns.getAllPhysical(); for (const auto & part : parts) diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 50b133482b2..732ad0405cc 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -1489,7 +1489,8 @@ void StorageReplicatedMergeTree::executeClearColumnInPartition(const LogEntry & auto new_columns = getColumns(); ASTPtr ignored_order_by_ast; ASTPtr ignored_primary_key_ast; - alter_command.apply(new_columns, ignored_order_by_ast, ignored_primary_key_ast); + ASTPtr ignored_indexes_ast; + alter_command.apply(new_columns, ignored_order_by_ast, ignored_primary_key_ast, ignored_indexes_ast); size_t modified_parts = 0; auto parts = data.getDataParts(); @@ -3067,7 +3068,8 @@ void StorageReplicatedMergeTree::alter(const AlterCommands & params, ColumnsDescription new_columns = data.getColumns(); ASTPtr new_order_by_ast = data.order_by_ast; ASTPtr new_primary_key_ast = data.primary_key_ast; - params.apply(new_columns, new_order_by_ast, new_primary_key_ast); + ASTPtr new_indexes_ast = data.skip_indexes_ast; + params.apply(new_columns, new_order_by_ast, new_primary_key_ast, new_indexes_ast); String new_columns_str = new_columns.toString(); if (new_columns_str != data.getColumns().toString()) From aec5570b6ecffdb701f17f7c107df28f8df88b4b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 14 Jan 2019 13:59:58 +0300 Subject: [PATCH 065/586] Attempt to implemnt global thread pool #4018 --- dbms/src/Common/MultiVersion.h | 56 ++++++++ dbms/src/Common/ThreadPool.cpp | 169 ++++++++++++++++++++++++ dbms/src/Common/ThreadPool.h | 151 +++++++++++++++++++++ dbms/src/Common/tests/multi_version.cpp | 56 ++++++++ 4 files changed, 432 insertions(+) create mode 100644 dbms/src/Common/MultiVersion.h create mode 100644 dbms/src/Common/ThreadPool.cpp create mode 100644 dbms/src/Common/ThreadPool.h create mode 100644 dbms/src/Common/tests/multi_version.cpp diff --git a/dbms/src/Common/MultiVersion.h b/dbms/src/Common/MultiVersion.h new file mode 100644 index 00000000000..3014689f861 --- /dev/null +++ b/dbms/src/Common/MultiVersion.h @@ -0,0 +1,56 @@ +#pragma once + +#include +#include + + +/** Allow to store and read-only usage of an object in several threads, + * and to atomically replace an object in another thread. + * The replacement is atomic and reading threads can work with different versions of an object. + * + * Usage: + * MultiVersion x; + * - on data update: + * x.set(new value); + * - on read-only usage: + * { + * MultiVersion::Version current_version = x.get(); + * // use *current_version + * } // now we finish own current version; if the version is outdated and no one else is using it - it will be destroyed. + * + * All methods are thread-safe. + */ +template +class MultiVersion +{ +public: + /// Version of object for usage. shared_ptr manage lifetime of version. + using Version = std::shared_ptr; + + /// Default initialization - by nullptr. + MultiVersion() = default; + + MultiVersion(std::unique_ptr && value) + { + set(std::move(value)); + } + + /// Obtain current version for read-only usage. Returns shared_ptr, that manages lifetime of version. + Version get() const + { + /// NOTE: is it possible to lock-free replace of shared_ptr? + std::lock_guard lock(mutex); + return current_version; + } + + /// Update an object with new version. + void set(std::unique_ptr && value) + { + std::lock_guard lock(mutex); + current_version = std::move(value); + } + +private: + Version current_version; + mutable std::mutex mutex; +}; diff --git a/dbms/src/Common/ThreadPool.cpp b/dbms/src/Common/ThreadPool.cpp new file mode 100644 index 00000000000..e3f03e18a46 --- /dev/null +++ b/dbms/src/Common/ThreadPool.cpp @@ -0,0 +1,169 @@ +#include +#include + + +template +ThreadPoolImpl::ThreadPoolImpl(size_t num_threads) + : ThreadPoolImpl(num_threads, num_threads) +{ +} + +template +ThreadPoolImpl::ThreadPoolImpl(size_t num_threads, size_t queue_size) + : num_threads(num_threads), queue_size(queue_size) +{ + threads.reserve(num_threads); +} + +template +void ThreadPoolImpl::schedule(Job job, int priority) +{ + { + std::unique_lock lock(mutex); + job_finished.wait(lock, [this] { return !queue_size || active_jobs < queue_size || shutdown; }); + if (shutdown) + return; + + jobs.emplace(std::move(job), priority); + ++active_jobs; + + if (threads.size() < std::min(num_threads, active_jobs)) + threads.emplace_back([this] { worker(); }); + } + new_job_or_shutdown.notify_one(); +} + +template +void ThreadPoolImpl::wait() +{ + { + std::unique_lock lock(mutex); + job_finished.wait(lock, [this] { return active_jobs == 0; }); + + if (first_exception) + { + std::exception_ptr exception; + std::swap(exception, first_exception); + std::rethrow_exception(exception); + } + } +} + +template +ThreadPoolImpl::~ThreadPoolImpl() +{ + finalize(); +} + +template +void ThreadPoolImpl::finalize() +{ + { + std::unique_lock lock(mutex); + shutdown = true; + } + + new_job_or_shutdown.notify_all(); + + for (auto & thread : threads) + thread.join(); + + threads.clear(); +} + +template +size_t ThreadPoolImpl::active() const +{ + std::unique_lock lock(mutex); + return active_jobs; +} + +template +void ThreadPoolImpl::worker() +{ + while (true) + { + Job job; + bool need_shutdown = false; + + { + std::unique_lock lock(mutex); + new_job_or_shutdown.wait(lock, [this] { return shutdown || !jobs.empty(); }); + need_shutdown = shutdown; + + if (!jobs.empty()) + { + job = jobs.top().job; + jobs.pop(); + } + else + { + return; + } + } + + if (!need_shutdown) + { + try + { + job(); + } + catch (...) + { + { + std::unique_lock lock(mutex); + if (!first_exception) + first_exception = std::current_exception(); + shutdown = true; + --active_jobs; + } + job_finished.notify_all(); + new_job_or_shutdown.notify_all(); + return; + } + } + + { + std::unique_lock lock(mutex); + --active_jobs; + } + + job_finished.notify_all(); + } +} + + +template class ThreadPoolImpl; +template class ThreadPoolImpl; + + +void ExceptionHandler::setException(std::exception_ptr && exception) +{ + std::unique_lock lock(mutex); + if (!first_exception) + first_exception = std::move(exception); +} + +void ExceptionHandler::throwIfException() +{ + std::unique_lock lock(mutex); + if (first_exception) + std::rethrow_exception(first_exception); +} + + +ThreadPool::Job createExceptionHandledJob(ThreadPool::Job job, ExceptionHandler & handler) +{ + return [job{std::move(job)}, &handler] () + { + try + { + job(); + } + catch (...) + { + handler.setException(std::current_exception()); + } + }; +} + diff --git a/dbms/src/Common/ThreadPool.h b/dbms/src/Common/ThreadPool.h new file mode 100644 index 00000000000..a8cf84dd7b5 --- /dev/null +++ b/dbms/src/Common/ThreadPool.h @@ -0,0 +1,151 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + + +/** Very simple thread pool similar to boost::threadpool. + * Advantages: + * - catches exceptions and rethrows on wait. + */ + +template +class ThreadPoolImpl +{ +public: + using Job = std::function; + + /// Size is constant. Up to num_threads are created on demand and then run until shutdown. + explicit ThreadPoolImpl(size_t num_threads); + + /// queue_size - maximum number of running plus scheduled jobs. It can be greater than num_threads. Zero means unlimited. + ThreadPoolImpl(size_t num_threads, size_t queue_size); + + /// Add new job. Locks until number of active jobs is less than maximum or exception in one of threads was thrown. + /// If an exception in some thread was thrown, method silently returns, and exception will be rethrown only on call to 'wait' function. + /// Priority: greater is higher. + void schedule(Job job, int priority = 0); + + /// Wait for all currently active jobs to be done. + /// You may call schedule and wait many times in arbitary order. + /// If any thread was throw an exception, first exception will be rethrown from this method, + /// and exception will be cleared. + void wait(); + + /// Waits for all threads. Doesn't rethrow exceptions (use 'wait' method to rethrow exceptions). + /// You should not destroy object while calling schedule or wait methods from another threads. + ~ThreadPoolImpl(); + + size_t size() const { return num_threads; } + + /// Returns number of running and scheduled jobs. + size_t active() const; + +private: + mutable std::mutex mutex; + std::condition_variable job_finished; + std::condition_variable new_job_or_shutdown; + + const size_t num_threads; + const size_t queue_size; + + size_t active_jobs = 0; + bool shutdown = false; + + struct JobWithPriority + { + Job job; + int priority; + + JobWithPriority(Job job, int priority) + : job(job), priority(priority) {} + + bool operator< (const JobWithPriority & rhs) const + { + return priority < rhs.priority; + } + }; + + std::priority_queue jobs; + std::vector threads; + std::exception_ptr first_exception; + + + void worker(); + + void finalize(); +}; + + +using FreeThreadPool = ThreadPoolImpl; + +class GlobalThreadPool : public FreeThreadPool, public ext::singleton +{ +public: + GlobalThreadPool() : FreeThreadPool(10000) {} /// TODO: global blocking limit may lead to deadlocks. +}; + +class ThreadFromGlobalPool +{ +public: + ThreadFromGlobalPool() {} + + ThreadFromGlobalPool(std::function func) + { + mutex = std::make_unique(); + /// The function object must be copyable, so we wrap lock_guard in shared_ptr. + GlobalThreadPool::instance().schedule([lock = std::make_shared>(*mutex), func = std::move(func)] { func(); }); + } + + ThreadFromGlobalPool(ThreadFromGlobalPool && rhs) + { + *this = std::move(rhs); + } + + ThreadFromGlobalPool & operator=(ThreadFromGlobalPool && rhs) + { + if (mutex) + std::terminate(); + mutex = std::move(rhs.mutex); + return *this; + } + + ~ThreadFromGlobalPool() + { + if (mutex) + std::terminate(); + } + + void join() + { + { + std::lock_guard lock(*mutex); + } + mutex.reset(); + } +private: + std::unique_ptr mutex; /// Object must be moveable. +}; + +using ThreadPool = ThreadPoolImpl; + + +/// Allows to save first catched exception in jobs and postpone its rethrow. +class ExceptionHandler +{ +public: + void setException(std::exception_ptr && exception); + void throwIfException(); + +private: + std::exception_ptr first_exception; + std::mutex mutex; +}; + +ThreadPool::Job createExceptionHandledJob(ThreadPool::Job job, ExceptionHandler & handler); diff --git a/dbms/src/Common/tests/multi_version.cpp b/dbms/src/Common/tests/multi_version.cpp new file mode 100644 index 00000000000..ee90a79801b --- /dev/null +++ b/dbms/src/Common/tests/multi_version.cpp @@ -0,0 +1,56 @@ +#include +#include +#include +#include +#include +#include + + +using T = std::string; +using MV = MultiVersion; +using Results = std::vector; + + +void thread1(MV & x, T & result) +{ + MV::Version v = x.get(); + result = *v; +} + +void thread2(MV & x, const char * result) +{ + x.set(std::make_unique(result)); +} + + +int main(int argc, char ** argv) +{ + try + { + const char * s1 = "Hello!"; + const char * s2 = "Goodbye!"; + + size_t n = 1000; + MV x(std::make_unique(s1)); + Results results(n); + + ThreadPool tp(8); + for (size_t i = 0; i < n; ++i) + { + tp.schedule(std::bind(thread1, std::ref(x), std::ref(results[i]))); + tp.schedule(std::bind(thread2, std::ref(x), (rand() % 2) ? s1 : s2)); + } + tp.wait(); + + for (size_t i = 0; i < n; ++i) + std::cerr << results[i] << " "; + std::cerr << std::endl; + } + catch (const Poco::Exception & e) + { + std::cerr << e.message() << std::endl; + throw; + } + + return 0; +} From 5a50a4fe2140c02e20694dd321579d471f9a9994 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 14 Jan 2019 19:27:28 +0300 Subject: [PATCH 066/586] Fix aggregate function low cardinality array argument. #4038 --- .../DataStreams/NativeBlockOutputStream.cpp | 6 +- dbms/src/DataTypes/DataTypeLowCardinality.h | 4 +- .../DataTypeLowCardinalityHelpers.cpp | 62 +++++++++++++------ dbms/src/Functions/IFunction.cpp | 12 ++-- dbms/src/Interpreters/Aggregator.cpp | 10 +-- dbms/src/Interpreters/Join.cpp | 37 ++++++++--- 6 files changed, 91 insertions(+), 40 deletions(-) diff --git a/dbms/src/DataStreams/NativeBlockOutputStream.cpp b/dbms/src/DataStreams/NativeBlockOutputStream.cpp index 11c3944afbb..4c0972af559 100644 --- a/dbms/src/DataStreams/NativeBlockOutputStream.cpp +++ b/dbms/src/DataStreams/NativeBlockOutputStream.cpp @@ -101,8 +101,10 @@ void NativeBlockOutputStream::write(const Block & block) /// Send data to old clients without low cardinality type. if (remove_low_cardinality || (client_revision && client_revision < DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE)) { - column.column = recursiveRemoveLowCardinality(column.column); - column.type = recursiveRemoveLowCardinality(column.type); + if (auto col = recursiveRemoveLowCardinality(column.column.get())) + column.column = col; + if (auto type = recursiveRemoveLowCardinality(column.type.get())) + column.type = type; } /// Name diff --git a/dbms/src/DataTypes/DataTypeLowCardinality.h b/dbms/src/DataTypes/DataTypeLowCardinality.h index 5744419bf01..74faf038ac8 100644 --- a/dbms/src/DataTypes/DataTypeLowCardinality.h +++ b/dbms/src/DataTypes/DataTypeLowCardinality.h @@ -165,10 +165,10 @@ private: DataTypePtr removeLowCardinality(const DataTypePtr & type); /// Remove LowCardinality recursively from all nested types. -DataTypePtr recursiveRemoveLowCardinality(const DataTypePtr & type); +DataTypePtr recursiveRemoveLowCardinality(const IDataType * type); /// Remove LowCardinality recursively from all nested columns. -ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column); +ColumnPtr recursiveRemoveLowCardinality(const IColumn * column); /// Convert column of type from_type to type to_type by converting nested LowCardinality columns. ColumnPtr recursiveLowCardinalityConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type); diff --git a/dbms/src/DataTypes/DataTypeLowCardinalityHelpers.cpp b/dbms/src/DataTypes/DataTypeLowCardinalityHelpers.cpp index 215b21f7994..2b17f24969e 100644 --- a/dbms/src/DataTypes/DataTypeLowCardinalityHelpers.cpp +++ b/dbms/src/DataTypes/DataTypeLowCardinalityHelpers.cpp @@ -16,19 +16,31 @@ namespace ErrorCodes extern const int TYPE_MISMATCH; } -DataTypePtr recursiveRemoveLowCardinality(const DataTypePtr & type) +DataTypePtr recursiveRemoveLowCardinality(const IDataType * type) { if (!type) - return type; + return nullptr; - if (const auto * array_type = typeid_cast(type.get())) - return std::make_shared(recursiveRemoveLowCardinality(array_type->getNestedType())); + if (const auto * array_type = typeid_cast(type)) + if (auto nested = recursiveRemoveLowCardinality(array_type->getNestedType().get())) + return std::make_shared(nested); - if (const auto * tuple_type = typeid_cast(type.get())) + if (const auto * tuple_type = typeid_cast(type)) { DataTypes elements = tuple_type->getElements(); + bool has_removed = false; + for (auto & element : elements) - element = recursiveRemoveLowCardinality(element); + { + if (auto removed = recursiveRemoveLowCardinality(element.get())) + { + element = removed; + has_removed = true; + } + } + + if (!has_removed) + return nullptr; if (tuple_type->haveExplicitNames()) return std::make_shared(elements, tuple_type->getElementNames()); @@ -36,35 +48,49 @@ DataTypePtr recursiveRemoveLowCardinality(const DataTypePtr & type) return std::make_shared(elements); } - if (const auto * low_cardinality_type = typeid_cast(type.get())) + if (const auto * low_cardinality_type = typeid_cast(type)) return low_cardinality_type->getDictionaryType(); - return type; + return nullptr; } -ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column) +ColumnPtr recursiveRemoveLowCardinality(const IColumn * column) { if (!column) - return column; + return nullptr; - if (const auto * column_array = typeid_cast(column.get())) - return ColumnArray::create(recursiveRemoveLowCardinality(column_array->getDataPtr()), column_array->getOffsetsPtr()); + if (const auto * column_array = typeid_cast(column)) + if (auto nested = recursiveRemoveLowCardinality(&column_array->getData())) + return ColumnArray::create(nested, column_array->getOffsetsPtr()); - if (const auto * column_const = typeid_cast(column.get())) - return ColumnConst::create(recursiveRemoveLowCardinality(column_const->getDataColumnPtr()), column_const->size()); + if (const auto * column_const = typeid_cast(column)) + if (auto nested = recursiveRemoveLowCardinality(&column_const->getDataColumn())) + return ColumnConst::create(nested, column_const->size()); - if (const auto * column_tuple = typeid_cast(column.get())) + if (const auto * column_tuple = typeid_cast(column)) { Columns columns = column_tuple->getColumns(); + bool removed_any = false; + for (auto & element : columns) - element = recursiveRemoveLowCardinality(element); + { + if (auto nested = recursiveRemoveLowCardinality(element.get())) + { + element = nested; + removed_any = true; + } + } + + if (!removed_any) + return nullptr; + return ColumnTuple::create(columns); } - if (const auto * column_low_cardinality = typeid_cast(column.get())) + if (const auto * column_low_cardinality = typeid_cast(column)) return column_low_cardinality->convertToFullColumn(); - return column; + return nullptr; } ColumnPtr recursiveLowCardinalityConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type) diff --git a/dbms/src/Functions/IFunction.cpp b/dbms/src/Functions/IFunction.cpp index ac5d1122e4a..5c753ed85fc 100644 --- a/dbms/src/Functions/IFunction.cpp +++ b/dbms/src/Functions/IFunction.cpp @@ -385,8 +385,10 @@ static void convertLowCardinalityColumnsToFull(Block & block, const ColumnNumber { ColumnWithTypeAndName & column = block.getByPosition(arg); - column.column = recursiveRemoveLowCardinality(column.column); - column.type = recursiveRemoveLowCardinality(column.type); + if (auto col = recursiveRemoveLowCardinality(column.column.get())) + column.column = col; + if (auto type = recursiveRemoveLowCardinality(column.type.get())) + column.type = type; } } @@ -599,8 +601,10 @@ DataTypePtr FunctionBuilderImpl::getReturnType(const ColumnsWithTypeAndName & ar for (auto & arg : args_without_low_cardinality) { - arg.column = recursiveRemoveLowCardinality(arg.column); - arg.type = recursiveRemoveLowCardinality(arg.type); + if (auto column = recursiveRemoveLowCardinality(arg.column.get())) + arg.column = column; + if (auto type = recursiveRemoveLowCardinality(arg.type.get())) + arg.type = type; } auto type_without_low_cardinality = getReturnTypeWithoutLowCardinality(args_without_low_cardinality); diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 145ce98dbbc..91d85cd45d8 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -768,11 +768,11 @@ bool Aggregator::executeOnBlock(const Block & block, AggregatedDataVariants & re materialized_columns.push_back(block.safeGetByPosition(params.keys[i]).column->convertToFullColumnIfConst()); key_columns[i] = materialized_columns.back().get(); - if (const auto * low_cardinality_column = typeid_cast(key_columns[i])) + if (!result.isLowCardinality()) { - if (!result.isLowCardinality()) + if (auto column = recursiveRemoveLowCardinality(key_columns[i])) { - materialized_columns.push_back(low_cardinality_column->convertToFullColumn()); + materialized_columns.emplace_back(std::move(column)); key_columns[i] = materialized_columns.back().get(); } } @@ -788,9 +788,9 @@ bool Aggregator::executeOnBlock(const Block & block, AggregatedDataVariants & re materialized_columns.push_back(block.safeGetByPosition(params.aggregates[i].arguments[j]).column->convertToFullColumnIfConst()); aggregate_columns[i][j] = materialized_columns.back().get(); - if (auto * col_low_cardinality = typeid_cast(aggregate_columns[i][j])) + if (auto column = recursiveRemoveLowCardinality(aggregate_columns[i][j])) { - materialized_columns.push_back(col_low_cardinality->convertToFullColumn()); + materialized_columns.emplace_back(std::move(column)); aggregate_columns[i][j] = materialized_columns.back().get(); } } diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 8783d16c3c1..2f0bae96104 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -253,12 +253,16 @@ void Join::setSampleBlock(const Block & block) size_t keys_size = key_names_right.size(); ColumnRawPtrs key_columns(keys_size); - Columns materialized_columns(keys_size); + Columns materialized_columns; for (size_t i = 0; i < keys_size; ++i) { - materialized_columns[i] = recursiveRemoveLowCardinality(block.getByName(key_names_right[i]).column); - key_columns[i] = materialized_columns[i].get(); + key_columns[i] = block.getByName(key_names_right[i]).column.get(); + if (auto col = recursiveRemoveLowCardinality(key_columns[i])) + { + materialized_columns.emplace_back(std::move(col)); + key_columns[i] = materialized_columns[i].get(); + } /// We will join only keys, where all components are not NULL. if (key_columns[i]->isColumnNullable()) @@ -278,8 +282,10 @@ void Join::setSampleBlock(const Block & block) if (key_names_right.end() != std::find(key_names_right.begin(), key_names_right.end(), name)) { auto & col = sample_block_with_columns_to_add.getByPosition(pos); - col.column = recursiveRemoveLowCardinality(col.column); - col.type = recursiveRemoveLowCardinality(col.type); + if (auto column = recursiveRemoveLowCardinality(col.column.get())) + col.column = column; + if (auto type = recursiveRemoveLowCardinality(col.type.get())) + col.type = type; sample_block_with_keys.insert(col); sample_block_with_columns_to_add.erase(pos); } @@ -429,7 +435,9 @@ bool Join::insertFromBlock(const Block & block) /// Memoize key columns to work. for (size_t i = 0; i < keys_size; ++i) { - materialized_columns.emplace_back(recursiveRemoveLowCardinality(block.getByName(key_names_right[i]).column->convertToFullColumnIfConst())); + materialized_columns.emplace_back(block.getByName(key_names_right[i]).column->convertToFullColumnIfConst()); + if (auto col = recursiveRemoveLowCardinality(materialized_columns.back().get())) + materialized_columns.back() = col; key_columns[i] = materialized_columns.back().get(); } @@ -667,7 +675,9 @@ void Join::joinBlockImpl( /// Memoize key columns to work with. for (size_t i = 0; i < keys_size; ++i) { - materialized_columns.emplace_back(recursiveRemoveLowCardinality(block.getByName(key_names_left[i]).column->convertToFullColumnIfConst())); + materialized_columns.emplace_back(block.getByName(key_names_left[i]).column->convertToFullColumnIfConst()); + if (auto col = recursiveRemoveLowCardinality(materialized_columns.back().get())) + materialized_columns.back() = col; key_columns[i] = materialized_columns.back().get(); } @@ -868,8 +878,17 @@ void Join::checkTypesOfKeys(const Block & block_left, const Names & key_names_le { /// Compare up to Nullability. - DataTypePtr left_type = removeNullable(recursiveRemoveLowCardinality(block_left.getByName(key_names_left[i]).type)); - DataTypePtr right_type = removeNullable(recursiveRemoveLowCardinality(block_right.getByName(key_names_right[i]).type)); + DataTypePtr left_type = block_left.getByName(key_names_left[i]).type; + DataTypePtr right_type = block_right.getByName(key_names_right[i]).type; + + if (auto type = recursiveRemoveLowCardinality(left_type.get())) + left_type = type; + + if (auto type = recursiveRemoveLowCardinality(right_type.get())) + right_type = type; + + left_type = removeNullable(left_type); + right_type = removeNullable(right_type); if (!left_type->equals(*right_type)) throw Exception("Type mismatch of columns to JOIN by: " From f6b9b063071662f1c391ee95ff16a30c89069010 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 14 Jan 2019 22:22:09 +0300 Subject: [PATCH 067/586] Attempt to implemnt global thread pool #4018 --- dbms/programs/server/MetricsTransmitter.h | 3 +- dbms/src/Common/Config/ConfigReloader.cpp | 2 +- dbms/src/Common/Config/ConfigReloader.h | 3 +- dbms/src/Common/CurrentThread.cpp | 2 +- dbms/src/Common/ErrorCodes.cpp | 1 + dbms/src/Common/MemoryTracker.cpp | 9 +- dbms/src/Common/ThreadPool.cpp | 126 +++++++++++++----- dbms/src/Common/ThreadPool.h | 82 +++++++++--- dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp | 4 +- dbms/src/Common/ZooKeeper/ZooKeeperImpl.h | 5 +- dbms/src/Common/tests/multi_version.cpp | 2 +- dbms/src/Core/BackgroundSchedulePool.cpp | 6 +- dbms/src/Core/BackgroundSchedulePool.h | 6 +- .../src/DataStreams/ParallelInputsProcessor.h | 5 +- .../ExecutableDictionarySource.cpp | 3 +- dbms/src/IO/AIOContextPool.h | 3 +- dbms/src/Interpreters/AsynchronousMetrics.h | 3 +- dbms/src/Interpreters/Compiler.cpp | 55 ++++---- dbms/src/Interpreters/Context.h | 3 +- dbms/src/Interpreters/DDLWorker.cpp | 2 +- dbms/src/Interpreters/DDLWorker.h | 3 +- .../src/Interpreters/EmbeddedDictionaries.cpp | 2 +- dbms/src/Interpreters/EmbeddedDictionaries.h | 3 +- dbms/src/Interpreters/ExternalLoader.cpp | 2 +- dbms/src/Interpreters/ExternalLoader.h | 3 +- dbms/src/Interpreters/SystemLog.h | 5 +- .../Storages/Distributed/DirectoryMonitor.h | 3 +- .../MergeTree/BackgroundProcessingPool.cpp | 4 +- .../MergeTree/BackgroundProcessingPool.h | 4 +- dbms/src/Storages/StorageBuffer.cpp | 2 +- dbms/src/Storages/StorageBuffer.h | 3 +- libs/libdaemon/src/BaseDaemon.cpp | 2 - 32 files changed, 248 insertions(+), 113 deletions(-) diff --git a/dbms/programs/server/MetricsTransmitter.h b/dbms/programs/server/MetricsTransmitter.h index e85113ad141..fd3853a7a9e 100644 --- a/dbms/programs/server/MetricsTransmitter.h +++ b/dbms/programs/server/MetricsTransmitter.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -46,7 +47,7 @@ private: bool quit = false; std::mutex mutex; std::condition_variable cond; - std::thread thread{&MetricsTransmitter::run, this}; + ThreadFromGlobalPool thread{&MetricsTransmitter::run, this}; static constexpr auto profile_events_path_prefix = "ClickHouse.ProfileEvents."; static constexpr auto current_metrics_path_prefix = "ClickHouse.Metrics."; diff --git a/dbms/src/Common/Config/ConfigReloader.cpp b/dbms/src/Common/Config/ConfigReloader.cpp index ed6fad4d42c..063fbec8e5b 100644 --- a/dbms/src/Common/Config/ConfigReloader.cpp +++ b/dbms/src/Common/Config/ConfigReloader.cpp @@ -33,7 +33,7 @@ ConfigReloader::ConfigReloader( void ConfigReloader::start() { - thread = std::thread(&ConfigReloader::run, this); + thread = ThreadFromGlobalPool(&ConfigReloader::run, this); } diff --git a/dbms/src/Common/Config/ConfigReloader.h b/dbms/src/Common/Config/ConfigReloader.h index ca4c97c5aee..c0904422b39 100644 --- a/dbms/src/Common/Config/ConfigReloader.h +++ b/dbms/src/Common/Config/ConfigReloader.h @@ -1,6 +1,7 @@ #pragma once #include "ConfigProcessor.h" +#include #include #include #include @@ -81,7 +82,7 @@ private: Updater updater; std::atomic quit{false}; - std::thread thread; + ThreadFromGlobalPool thread; /// Locked inside reloadIfNewer. std::mutex reload_mutex; diff --git a/dbms/src/Common/CurrentThread.cpp b/dbms/src/Common/CurrentThread.cpp index 8c05c91bac3..c3e0cae9571 100644 --- a/dbms/src/Common/CurrentThread.cpp +++ b/dbms/src/Common/CurrentThread.cpp @@ -34,7 +34,7 @@ ThreadStatus & CurrentThread::get() ProfileEvents::Counters & CurrentThread::getProfileEvents() { - return get().performance_counters; + return current_thread ? get().performance_counters : ProfileEvents::global_counters; } MemoryTracker & CurrentThread::getMemoryTracker() diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index 13ea9e4744a..eb52b6ff7e3 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -408,6 +408,7 @@ namespace ErrorCodes extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE = 431; extern const int UNKNOWN_CODEC = 432; extern const int ILLEGAL_CODEC_PARAMETER = 433; + extern const int CANNOT_SCHEDULE_TASK = 434; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/Common/MemoryTracker.cpp b/dbms/src/Common/MemoryTracker.cpp index f7c2eb0ef78..6a997e3b19a 100644 --- a/dbms/src/Common/MemoryTracker.cpp +++ b/dbms/src/Common/MemoryTracker.cpp @@ -190,17 +190,20 @@ namespace CurrentMemoryTracker { void alloc(Int64 size) { - DB::CurrentThread::getMemoryTracker().alloc(size); + if (DB::current_thread) + DB::CurrentThread::getMemoryTracker().alloc(size); } void realloc(Int64 old_size, Int64 new_size) { - DB::CurrentThread::getMemoryTracker().alloc(new_size - old_size); + if (DB::current_thread) + DB::CurrentThread::getMemoryTracker().alloc(new_size - old_size); } void free(Int64 size) { - DB::CurrentThread::getMemoryTracker().free(size); + if (DB::current_thread) + DB::CurrentThread::getMemoryTracker().free(size); } } diff --git a/dbms/src/Common/ThreadPool.cpp b/dbms/src/Common/ThreadPool.cpp index e3f03e18a46..487bd6fd66d 100644 --- a/dbms/src/Common/ThreadPool.cpp +++ b/dbms/src/Common/ThreadPool.cpp @@ -1,44 +1,103 @@ -#include +#include +#include + #include +#include + + +namespace DB +{ + namespace ErrorCodes + { + extern const int CANNOT_SCHEDULE_TASK; + } +} template -ThreadPoolImpl::ThreadPoolImpl(size_t num_threads) - : ThreadPoolImpl(num_threads, num_threads) +ThreadPoolImpl::ThreadPoolImpl(size_t max_threads) + : ThreadPoolImpl(max_threads, max_threads, max_threads) { } template -ThreadPoolImpl::ThreadPoolImpl(size_t num_threads, size_t queue_size) - : num_threads(num_threads), queue_size(queue_size) +ThreadPoolImpl::ThreadPoolImpl(size_t max_threads, size_t max_free_threads, size_t queue_size) + : max_threads(max_threads), max_free_threads(max_free_threads), queue_size(queue_size) { - threads.reserve(num_threads); +} + +template +template +ReturnType ThreadPoolImpl::scheduleImpl(Job job, int priority, std::optional wait_microseconds) +{ + auto on_error = [] + { + if constexpr (std::is_same_v) + throw DB::Exception("Cannot schedule a task", DB::ErrorCodes::CANNOT_SCHEDULE_TASK); + else + return false; + }; + + { + std::unique_lock lock(mutex); + + auto pred = [this] { return !queue_size || scheduled_jobs < queue_size || shutdown; }; + + if (wait_microseconds) + { + if (!job_finished.wait_for(lock, std::chrono::microseconds(*wait_microseconds), pred)) + return on_error(); + } + else + job_finished.wait(lock, pred); + + if (shutdown) + return on_error(); + + jobs.emplace(std::move(job), priority); + ++scheduled_jobs; + + if (threads.size() < std::min(max_threads, scheduled_jobs)) + { + threads.emplace_front(); + try + { + threads.front() = Thread([this, it = threads.begin()] { worker(it); }); + } + catch (...) + { + threads.pop_front(); + } + } + } + new_job_or_shutdown.notify_one(); + return ReturnType(true); } template void ThreadPoolImpl::schedule(Job job, int priority) { - { - std::unique_lock lock(mutex); - job_finished.wait(lock, [this] { return !queue_size || active_jobs < queue_size || shutdown; }); - if (shutdown) - return; + scheduleImpl(std::move(job), priority, std::nullopt); +} - jobs.emplace(std::move(job), priority); - ++active_jobs; +template +bool ThreadPoolImpl::trySchedule(Job job, int priority, uint64_t wait_microseconds) +{ + return scheduleImpl(std::move(job), priority, wait_microseconds); +} - if (threads.size() < std::min(num_threads, active_jobs)) - threads.emplace_back([this] { worker(); }); - } - new_job_or_shutdown.notify_one(); +template +void ThreadPoolImpl::scheduleOrThrow(Job job, int priority, uint64_t wait_microseconds) +{ + scheduleImpl(std::move(job), priority, wait_microseconds); } template void ThreadPoolImpl::wait() { { - std::unique_lock lock(mutex); - job_finished.wait(lock, [this] { return active_jobs == 0; }); + std::unique_lock lock(mutex); + job_finished.wait(lock, [this] { return scheduled_jobs == 0; }); if (first_exception) { @@ -59,7 +118,7 @@ template void ThreadPoolImpl::finalize() { { - std::unique_lock lock(mutex); + std::unique_lock lock(mutex); shutdown = true; } @@ -74,12 +133,12 @@ void ThreadPoolImpl::finalize() template size_t ThreadPoolImpl::active() const { - std::unique_lock lock(mutex); - return active_jobs; + std::unique_lock lock(mutex); + return scheduled_jobs; } template -void ThreadPoolImpl::worker() +void ThreadPoolImpl::worker(typename std::list::iterator thread_it) { while (true) { @@ -87,7 +146,7 @@ void ThreadPoolImpl::worker() bool need_shutdown = false; { - std::unique_lock lock(mutex); + std::unique_lock lock(mutex); new_job_or_shutdown.wait(lock, [this] { return shutdown || !jobs.empty(); }); need_shutdown = shutdown; @@ -111,11 +170,11 @@ void ThreadPoolImpl::worker() catch (...) { { - std::unique_lock lock(mutex); + std::unique_lock lock(mutex); if (!first_exception) first_exception = std::current_exception(); shutdown = true; - --active_jobs; + --scheduled_jobs; } job_finished.notify_all(); new_job_or_shutdown.notify_all(); @@ -124,8 +183,15 @@ void ThreadPoolImpl::worker() } { - std::unique_lock lock(mutex); - --active_jobs; + std::unique_lock lock(mutex); + --scheduled_jobs; + + if (threads.size() > scheduled_jobs + max_free_threads) + { + threads.erase(thread_it); + job_finished.notify_all(); + return; + } } job_finished.notify_all(); @@ -139,14 +205,14 @@ template class ThreadPoolImpl; void ExceptionHandler::setException(std::exception_ptr && exception) { - std::unique_lock lock(mutex); + std::unique_lock lock(mutex); if (!first_exception) first_exception = std::move(exception); } void ExceptionHandler::throwIfException() { - std::unique_lock lock(mutex); + std::unique_lock lock(mutex); if (first_exception) std::rethrow_exception(first_exception); } diff --git a/dbms/src/Common/ThreadPool.h b/dbms/src/Common/ThreadPool.h index a8cf84dd7b5..c0ab07fdcf5 100644 --- a/dbms/src/Common/ThreadPool.h +++ b/dbms/src/Common/ThreadPool.h @@ -6,15 +6,23 @@ #include #include #include -#include +#include +#include #include +#include + /** Very simple thread pool similar to boost::threadpool. * Advantages: * - catches exceptions and rethrows on wait. + * + * This thread pool can be used as a task queue. + * For example, you can create a thread pool with 10 threads (and queue of size 10) and schedule 1000 tasks + * - in this case you will be blocked to keep 10 tasks in fly. + * + * Thread: std::thread or something with identical interface. */ - template class ThreadPoolImpl { @@ -22,16 +30,22 @@ public: using Job = std::function; /// Size is constant. Up to num_threads are created on demand and then run until shutdown. - explicit ThreadPoolImpl(size_t num_threads); + explicit ThreadPoolImpl(size_t max_threads); - /// queue_size - maximum number of running plus scheduled jobs. It can be greater than num_threads. Zero means unlimited. - ThreadPoolImpl(size_t num_threads, size_t queue_size); + /// queue_size - maximum number of running plus scheduled jobs. It can be greater than max_threads. Zero means unlimited. + ThreadPoolImpl(size_t max_threads, size_t max_free_threads, size_t queue_size); - /// Add new job. Locks until number of active jobs is less than maximum or exception in one of threads was thrown. + /// Add new job. Locks until number of scheduled jobs is less than maximum or exception in one of threads was thrown. /// If an exception in some thread was thrown, method silently returns, and exception will be rethrown only on call to 'wait' function. /// Priority: greater is higher. void schedule(Job job, int priority = 0); + /// Wait for specified amount of time and schedule a job or return false. + bool trySchedule(Job job, int priority = 0, uint64_t wait_microseconds = 0); + + /// Wait for specified amount of time and schedule a job or throw an exception. + void scheduleOrThrow(Job job, int priority = 0, uint64_t wait_microseconds = 0); + /// Wait for all currently active jobs to be done. /// You may call schedule and wait many times in arbitary order. /// If any thread was throw an exception, first exception will be rethrown from this method, @@ -42,8 +56,6 @@ public: /// You should not destroy object while calling schedule or wait methods from another threads. ~ThreadPoolImpl(); - size_t size() const { return num_threads; } - /// Returns number of running and scheduled jobs. size_t active() const; @@ -52,10 +64,11 @@ private: std::condition_variable job_finished; std::condition_variable new_job_or_shutdown; - const size_t num_threads; + const size_t max_threads; + const size_t max_free_threads; const size_t queue_size; - size_t active_jobs = 0; + size_t scheduled_jobs = 0; bool shutdown = false; struct JobWithPriority @@ -73,34 +86,65 @@ private: }; std::priority_queue jobs; - std::vector threads; + std::list threads; std::exception_ptr first_exception; - void worker(); + template + ReturnType scheduleImpl(Job job, int priority, std::optional wait_microseconds); + + void worker(typename std::list::iterator thread_it); void finalize(); }; +/// ThreadPool with std::thread for threads. using FreeThreadPool = ThreadPoolImpl; + +/** Global ThreadPool that can be used as a singleton. + * Why it is needed? + * + * Linux can create and destroy about 100 000 threads per second (quite good). + * With simple ThreadPool (based on mutex and condvar) you can assign about 200 000 tasks per second + * - not much difference comparing to not using a thread pool at all. + * + * But if you reuse OS threads instead of creating and destroying them, several benefits exist: + * - allocator performance will usually be better due to reuse of thread local caches, especially for jemalloc: + * https://github.com/jemalloc/jemalloc/issues/1347 + * - address sanitizer and thread sanitizer will not fail due to global limit on number of created threads. + * - program will work faster in gdb; + */ class GlobalThreadPool : public FreeThreadPool, public ext::singleton { public: - GlobalThreadPool() : FreeThreadPool(10000) {} /// TODO: global blocking limit may lead to deadlocks. + GlobalThreadPool() : FreeThreadPool(10000, 1000, 10000) {} }; + +/** Looks like std::thread but allocates threads in GlobalThreadPool. + * Also holds ThreadStatus for ClickHouse. + */ class ThreadFromGlobalPool { public: ThreadFromGlobalPool() {} - ThreadFromGlobalPool(std::function func) + template + explicit ThreadFromGlobalPool(Function && func, Args &&... args) { mutex = std::make_unique(); + /// The function object must be copyable, so we wrap lock_guard in shared_ptr. - GlobalThreadPool::instance().schedule([lock = std::make_shared>(*mutex), func = std::move(func)] { func(); }); + GlobalThreadPool::instance().scheduleOrThrow([ + lock = std::make_shared>(*mutex), + func = std::forward(func), + args = std::make_tuple(std::forward(args)...)] + { + DB::ThreadStatus thread_status; + std::apply(func, args); + }); } ThreadFromGlobalPool(ThreadFromGlobalPool && rhs) @@ -129,10 +173,18 @@ public: } mutex.reset(); } + + bool joinable() const + { + return static_cast(mutex); + } + private: std::unique_ptr mutex; /// Object must be moveable. }; + +/// Recommended thread pool for the case when multiple thread pools are created and destroyed. using ThreadPool = ThreadPoolImpl; diff --git a/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 9626a54aa20..ac049bcb8e5 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -853,8 +853,8 @@ ZooKeeper::ZooKeeper( if (!auth_scheme.empty()) sendAuth(auth_scheme, auth_data); - send_thread = std::thread([this] { sendThread(); }); - receive_thread = std::thread([this] { receiveThread(); }); + send_thread = ThreadFromGlobalPool([this] { sendThread(); }); + receive_thread = ThreadFromGlobalPool([this] { receiveThread(); }); ProfileEvents::increment(ProfileEvents::ZooKeeperInit); } diff --git a/dbms/src/Common/ZooKeeper/ZooKeeperImpl.h b/dbms/src/Common/ZooKeeper/ZooKeeperImpl.h index c93f13b9351..e5da9ea48fe 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeperImpl.h +++ b/dbms/src/Common/ZooKeeper/ZooKeeperImpl.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -209,8 +210,8 @@ private: Watches watches; std::mutex watches_mutex; - std::thread send_thread; - std::thread receive_thread; + ThreadFromGlobalPool send_thread; + ThreadFromGlobalPool receive_thread; void connect( const Addresses & addresses, diff --git a/dbms/src/Common/tests/multi_version.cpp b/dbms/src/Common/tests/multi_version.cpp index ee90a79801b..0937e597e2d 100644 --- a/dbms/src/Common/tests/multi_version.cpp +++ b/dbms/src/Common/tests/multi_version.cpp @@ -23,7 +23,7 @@ void thread2(MV & x, const char * result) } -int main(int argc, char ** argv) +int main(int, char **) { try { diff --git a/dbms/src/Core/BackgroundSchedulePool.cpp b/dbms/src/Core/BackgroundSchedulePool.cpp index 5da499e5ae9..0493e13b2b9 100644 --- a/dbms/src/Core/BackgroundSchedulePool.cpp +++ b/dbms/src/Core/BackgroundSchedulePool.cpp @@ -161,9 +161,9 @@ BackgroundSchedulePool::BackgroundSchedulePool(size_t size) threads.resize(size); for (auto & thread : threads) - thread = std::thread([this] { threadFunction(); }); + thread = ThreadFromGlobalPool([this] { threadFunction(); }); - delayed_thread = std::thread([this] { delayExecutionThreadFunction(); }); + delayed_thread = ThreadFromGlobalPool([this] { delayExecutionThreadFunction(); }); } @@ -181,7 +181,7 @@ BackgroundSchedulePool::~BackgroundSchedulePool() delayed_thread.join(); LOG_TRACE(&Logger::get("BackgroundSchedulePool"), "Waiting for threads to finish."); - for (std::thread & thread : threads) + for (auto & thread : threads) thread.join(); } catch (...) diff --git a/dbms/src/Core/BackgroundSchedulePool.h b/dbms/src/Core/BackgroundSchedulePool.h index ba23d93733f..7b75d9459ba 100644 --- a/dbms/src/Core/BackgroundSchedulePool.h +++ b/dbms/src/Core/BackgroundSchedulePool.h @@ -13,6 +13,8 @@ #include #include #include +#include + namespace DB { @@ -119,7 +121,7 @@ public: ~BackgroundSchedulePool(); private: - using Threads = std::vector; + using Threads = std::vector; void threadFunction(); void delayExecutionThreadFunction(); @@ -141,7 +143,7 @@ private: std::condition_variable wakeup_cond; std::mutex delayed_tasks_mutex; /// Thread waiting for next delayed task. - std::thread delayed_thread; + ThreadFromGlobalPool delayed_thread; /// Tasks ordered by scheduled time. DelayedTasks delayed_tasks; diff --git a/dbms/src/DataStreams/ParallelInputsProcessor.h b/dbms/src/DataStreams/ParallelInputsProcessor.h index ba086b98939..a83c2ca1e56 100644 --- a/dbms/src/DataStreams/ParallelInputsProcessor.h +++ b/dbms/src/DataStreams/ParallelInputsProcessor.h @@ -13,6 +13,7 @@ #include #include #include +#include /** Allows to process multiple block input streams (sources) in parallel, using specified number of threads. @@ -306,8 +307,8 @@ private: Handler & handler; - /// Streams. - using ThreadsData = std::vector; + /// Threads. + using ThreadsData = std::vector; ThreadsData threads; /** A set of available sources that are not currently processed by any thread. diff --git a/dbms/src/Dictionaries/ExecutableDictionarySource.cpp b/dbms/src/Dictionaries/ExecutableDictionarySource.cpp index 376153bd0e9..028e0452fff 100644 --- a/dbms/src/Dictionaries/ExecutableDictionarySource.cpp +++ b/dbms/src/Dictionaries/ExecutableDictionarySource.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "DictionarySourceFactory.h" #include "DictionarySourceHelpers.h" @@ -165,7 +166,7 @@ namespace BlockInputStreamPtr stream; std::unique_ptr command; std::packaged_task task; - std::thread thread; + ThreadFromGlobalPool thread; bool wait_called = false; }; diff --git a/dbms/src/IO/AIOContextPool.h b/dbms/src/IO/AIOContextPool.h index 64d01a0f45b..ca92e14b6ed 100644 --- a/dbms/src/IO/AIOContextPool.h +++ b/dbms/src/IO/AIOContextPool.h @@ -8,6 +8,7 @@ #include #include #include +#include namespace DB @@ -32,7 +33,7 @@ class AIOContextPool : public ext::singleton std::map> promises; std::atomic cancelled{false}; - std::thread io_completion_monitor{&AIOContextPool::doMonitor, this}; + ThreadFromGlobalPool io_completion_monitor{&AIOContextPool::doMonitor, this}; ~AIOContextPool(); diff --git a/dbms/src/Interpreters/AsynchronousMetrics.h b/dbms/src/Interpreters/AsynchronousMetrics.h index ceafc2af586..8ccefb9e930 100644 --- a/dbms/src/Interpreters/AsynchronousMetrics.h +++ b/dbms/src/Interpreters/AsynchronousMetrics.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB @@ -43,7 +44,7 @@ private: Container container; mutable std::mutex container_mutex; - std::thread thread; + ThreadFromGlobalPool thread; void run(); void update(); diff --git a/dbms/src/Interpreters/Compiler.cpp b/dbms/src/Interpreters/Compiler.cpp index 8a60b24a24b..9b0a8371f09 100644 --- a/dbms/src/Interpreters/Compiler.cpp +++ b/dbms/src/Interpreters/Compiler.cpp @@ -142,40 +142,37 @@ SharedLibraryPtr Compiler::getOrCount( { /// The min_count_to_compile value of zero indicates the need for synchronous compilation. - /// Are there any free threads? - if (min_count_to_compile == 0 || pool.active() < pool.size()) + /// Indicates that the library is in the process of compiling. + libraries[hashed_key] = nullptr; + + LOG_INFO(log, "Compiling code " << file_name << ", key: " << key); + + if (min_count_to_compile == 0) { - /// Indicates that the library is in the process of compiling. - libraries[hashed_key] = nullptr; - - LOG_INFO(log, "Compiling code " << file_name << ", key: " << key); - - if (min_count_to_compile == 0) { - { - ext::unlock_guard unlock(mutex); - compile(hashed_key, file_name, additional_compiler_flags, get_code, on_ready); - } + ext::unlock_guard unlock(mutex); + compile(hashed_key, file_name, additional_compiler_flags, get_code, on_ready); + } - return libraries[hashed_key]; - } - else - { - pool.schedule([=] - { - try - { - compile(hashed_key, file_name, additional_compiler_flags, get_code, on_ready); - } - catch (...) - { - tryLogCurrentException("Compiler"); - } - }); - } + return libraries[hashed_key]; } else - LOG_INFO(log, "All threads are busy."); + { + bool res = pool.trySchedule([=] + { + try + { + compile(hashed_key, file_name, additional_compiler_flags, get_code, on_ready); + } + catch (...) + { + tryLogCurrentException("Compiler"); + } + }); + + if (!res) + LOG_INFO(log, "All threads are busy."); + } } return nullptr; diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index ca90073436a..749c2ae40d5 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -521,7 +522,7 @@ private: std::mutex mutex; std::condition_variable cond; std::atomic quit{false}; - std::thread thread{&SessionCleaner::run, this}; + ThreadFromGlobalPool thread{&SessionCleaner::run, this}; }; } diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp index 54fcffbea2a..730e37d9bd3 100644 --- a/dbms/src/Interpreters/DDLWorker.cpp +++ b/dbms/src/Interpreters/DDLWorker.cpp @@ -241,7 +241,7 @@ DDLWorker::DDLWorker(const std::string & zk_root_dir, Context & context_, const event_queue_updated = std::make_shared(); - thread = std::thread(&DDLWorker::run, this); + thread = ThreadFromGlobalPool(&DDLWorker::run, this); } diff --git a/dbms/src/Interpreters/DDLWorker.h b/dbms/src/Interpreters/DDLWorker.h index d3872b8ac95..18714720d2d 100644 --- a/dbms/src/Interpreters/DDLWorker.h +++ b/dbms/src/Interpreters/DDLWorker.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -90,7 +91,7 @@ private: std::shared_ptr event_queue_updated; std::atomic stop_flag{false}; - std::thread thread; + ThreadFromGlobalPool thread; Int64 last_cleanup_time_seconds = 0; diff --git a/dbms/src/Interpreters/EmbeddedDictionaries.cpp b/dbms/src/Interpreters/EmbeddedDictionaries.cpp index 10f5692f6e6..60524d63cee 100644 --- a/dbms/src/Interpreters/EmbeddedDictionaries.cpp +++ b/dbms/src/Interpreters/EmbeddedDictionaries.cpp @@ -150,7 +150,7 @@ EmbeddedDictionaries::EmbeddedDictionaries( , reload_period(context_.getConfigRef().getInt("builtin_dictionaries_reload_interval", 3600)) { reloadImpl(throw_on_error); - reloading_thread = std::thread([this] { reloadPeriodically(); }); + reloading_thread = ThreadFromGlobalPool([this] { reloadPeriodically(); }); } diff --git a/dbms/src/Interpreters/EmbeddedDictionaries.h b/dbms/src/Interpreters/EmbeddedDictionaries.h index ad2dd404b3e..caa7c1cc62d 100644 --- a/dbms/src/Interpreters/EmbeddedDictionaries.h +++ b/dbms/src/Interpreters/EmbeddedDictionaries.h @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -41,7 +42,7 @@ private: mutable std::mutex mutex; - std::thread reloading_thread; + ThreadFromGlobalPool reloading_thread; Poco::Event destroy; diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index e4ccd9962c6..814fc5ecec2 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -72,7 +72,7 @@ void ExternalLoader::init(bool throw_on_error) reloadAndUpdate(throw_on_error); } - reloading_thread = std::thread{&ExternalLoader::reloadPeriodically, this}; + reloading_thread = ThreadFromGlobalPool{&ExternalLoader::reloadPeriodically, this}; } diff --git a/dbms/src/Interpreters/ExternalLoader.h b/dbms/src/Interpreters/ExternalLoader.h index ac672f925e3..c2ce161f0e1 100644 --- a/dbms/src/Interpreters/ExternalLoader.h +++ b/dbms/src/Interpreters/ExternalLoader.h @@ -13,6 +13,7 @@ #include #include #include +#include namespace DB @@ -160,7 +161,7 @@ private: std::unique_ptr config_repository; - std::thread reloading_thread; + ThreadFromGlobalPool reloading_thread; Poco::Event destroy; Logger * log; diff --git a/dbms/src/Interpreters/SystemLog.h b/dbms/src/Interpreters/SystemLog.h index 1a4283fae8e..4cb2bb76b4f 100644 --- a/dbms/src/Interpreters/SystemLog.h +++ b/dbms/src/Interpreters/SystemLog.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -135,7 +136,7 @@ protected: /** In this thread, data is pulled from 'queue' and stored in 'data', and then written into table. */ - std::thread saving_thread; + ThreadFromGlobalPool saving_thread; void threadFunction(); @@ -161,7 +162,7 @@ SystemLog::SystemLog(Context & context_, log = &Logger::get("SystemLog (" + database_name + "." + table_name + ")"); data.reserve(DBMS_SYSTEM_LOG_QUEUE_SIZE); - saving_thread = std::thread([this] { threadFunction(); }); + saving_thread = ThreadFromGlobalPool([this] { threadFunction(); }); } diff --git a/dbms/src/Storages/Distributed/DirectoryMonitor.h b/dbms/src/Storages/Distributed/DirectoryMonitor.h index 484b0ac3f23..d7858d3af40 100644 --- a/dbms/src/Storages/Distributed/DirectoryMonitor.h +++ b/dbms/src/Storages/Distributed/DirectoryMonitor.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -55,7 +56,7 @@ private: std::mutex mutex; std::condition_variable cond; Logger * log; - std::thread thread {&StorageDistributedDirectoryMonitor::run, this}; + ThreadFromGlobalPool thread{&StorageDistributedDirectoryMonitor::run, this}; }; } diff --git a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp index c0911ac4d5e..b60d860ec6c 100644 --- a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp +++ b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp @@ -67,7 +67,7 @@ BackgroundProcessingPool::BackgroundProcessingPool(int size_) : size(size_) threads.resize(size); for (auto & thread : threads) - thread = std::thread([this] { threadFunction(); }); + thread = ThreadFromGlobalPool([this] { threadFunction(); }); } @@ -110,7 +110,7 @@ BackgroundProcessingPool::~BackgroundProcessingPool() { shutdown = true; wake_event.notify_all(); - for (std::thread & thread : threads) + for (auto & thread : threads) thread.join(); } catch (...) diff --git a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h index 4eb5d4cce56..fdf5251cb8a 100644 --- a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h +++ b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h @@ -13,6 +13,8 @@ #include #include #include +#include + namespace DB @@ -60,7 +62,7 @@ protected: friend class BackgroundProcessingPoolTaskInfo; using Tasks = std::multimap; /// key is desired next time to execute (priority). - using Threads = std::vector; + using Threads = std::vector; const size_t size; diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index 5d76279c95f..511364bc11f 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -420,7 +420,7 @@ void StorageBuffer::startup() << " Set apropriate system_profile to fix this."); } - flush_thread = std::thread(&StorageBuffer::flushThread, this); + flush_thread = ThreadFromGlobalPool(&StorageBuffer::flushThread, this); } diff --git a/dbms/src/Storages/StorageBuffer.h b/dbms/src/Storages/StorageBuffer.h index 9992d1b49bd..85ea3f086b5 100644 --- a/dbms/src/Storages/StorageBuffer.h +++ b/dbms/src/Storages/StorageBuffer.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -110,7 +111,7 @@ private: Poco::Event shutdown_event; /// Resets data by timeout. - std::thread flush_thread; + ThreadFromGlobalPool flush_thread; void flushAllBuffers(bool check_thresholds = true); /// Reset the buffer. If check_thresholds is set - resets only if thresholds are exceeded. diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index bad38c78529..e62ff31172f 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -998,8 +998,6 @@ void BaseDaemon::initialize(Application & self) } initializeTerminationAndSignalProcessing(); - - DB::CurrentThread::get(); /// TODO Why do we need this? logRevision(); for (const auto & key : DB::getMultipleKeysFromConfig(config(), "", "graphite")) From 026a30092ebe13c1c3a9f8e2b1a90680a28d10c7 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 15 Jan 2019 17:49:22 +0300 Subject: [PATCH 068/586] added alter --- dbms/src/Storages/AlterCommands.cpp | 33 ++++++++++++++++++++++---- dbms/src/Storages/StorageMergeTree.cpp | 2 +- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index a32d37dcea3..ae36da3cb0e 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -321,7 +321,7 @@ void AlterCommand::apply(ColumnsDescription & columns_description, ASTPtr & orde if (std::any_of( indexes_decl_ast->children.cbegin(), indexes_decl_ast->children.cend(), - [this](const ASTPtr & index_ast){ + [this](const ASTPtr & index_ast) { return typeid_cast(*index_ast).name == index_name; })) { @@ -332,12 +332,37 @@ void AlterCommand::apply(ColumnsDescription & columns_description, ASTPtr & orde ErrorCodes::ILLEGAL_COLUMN}; } - //auto insert_it = indexes_decl_ast->children.end(); - // TODO: implementation + auto insert_it = indexes_decl_ast->children.end(); + + if (!after_index_name.empty()) + { + insert_it = std::find_if( + indexes_decl_ast->children.begin(), + indexes_decl_ast->children.end(), + [this](const ASTPtr & index_ast) { + return typeid_cast(*index_ast).name == after_index_name; + }); + if (insert_it == indexes_decl_ast->children.end()) { + throw Exception("Wrong index name. Cannot find index `" + after_index_name + "` to insert after.", + ErrorCodes::LOGICAL_ERROR); + } + } + indexes_decl_ast->children.emplace(insert_it, index_decl); } else if (type == DROP_INDEX) { - // TODO: implementation + auto erase_it = std::find_if( + indexes_decl_ast->children.begin(), + indexes_decl_ast->children.end(), + [this](const ASTPtr & index_ast) { + return typeid_cast(*index_ast).name == index_name; + }); + if (erase_it == indexes_decl_ast->children.end()) + { + throw Exception("Wrong index name. Cannot find index `" + index_name + "` to drop.", + ErrorCodes::LOGICAL_ERROR); + } + indexes_decl_ast->children.erase(erase_it); } else throw Exception("Wrong parameter type in ALTER query", ErrorCodes::LOGICAL_ERROR); diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 1fd0f422975..39dba4ed109 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -245,7 +245,7 @@ void StorageMergeTree::alter( /// Reinitialize primary key because primary key column types might have changed. data.setPrimaryKeyAndColumns(new_order_by_ast, new_primary_key_ast, new_columns); - data.setSkipIndexes(data.skip_indexes_ast); + data.setSkipIndexes(new_indexes_ast); for (auto & transaction : transactions) transaction->commit(); From 2f07e31903397bd0a44543133511aeaf1d793291 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 15 Jan 2019 20:39:10 +0300 Subject: [PATCH 069/586] fix --- dbms/src/Storages/AlterCommands.cpp | 3 +-- dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp | 2 +- dbms/src/Storages/StorageMergeTree.cpp | 3 +++ 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index ae36da3cb0e..e410dcddf4a 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -117,6 +117,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ else if (command_ast->type == ASTAlterCommand::ADD_INDEX) { AlterCommand command; + command.index_decl = command_ast->index_decl; command.type = AlterCommand::ADD_INDEX; const auto & ast_index_decl = typeid_cast(*command_ast->index_decl); @@ -128,7 +129,6 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.if_not_exists = command_ast->if_not_exists; - throw Exception("\"ALTER TABLE table ADD/DROP INDEX ...\" queries are not supported yet.", ErrorCodes::NOT_IMPLEMENTED); return command; } else if (command_ast->type == ASTAlterCommand::DROP_INDEX) @@ -141,7 +141,6 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.index_name = typeid_cast(*(command_ast->index)).name; command.if_exists = command_ast->if_exists; - throw Exception("\"ALTER TABLE table ADD/DROP INDEX ...\" queries are not supported yet.", ErrorCodes::NOT_IMPLEMENTED); return command; } else diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp index 993dd7acc02..a79af5a52d6 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp @@ -174,7 +174,7 @@ std::unique_ptr MergeTreeMinMaxIndexCreator( DataTypes data_types; Poco::Logger * log = &Poco::Logger::get("minmax_idx"); - LOG_DEBUG(log, "new minmax index"); + LOG_DEBUG(log, "new minmax index" << node->name); for (size_t i = 0; i < expr_list->children.size(); ++i) { const auto & column = sample.getByPosition(i); diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 39dba4ed109..688fadaa909 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -239,6 +239,9 @@ void StorageMergeTree::alter( if (new_primary_key_ast.get() != data.primary_key_ast.get()) storage_ast.set(storage_ast.primary_key, new_primary_key_ast); + + if (new_indexes_ast.get() != data.skip_indexes_ast.get()) + storage_ast.set(storage_ast.indexes, new_indexes_ast); }; context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, storage_modifier); From a98e822a100d5d2967afbd22c2cc19d422a499cf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 15 Jan 2019 21:39:54 +0300 Subject: [PATCH 070/586] Attempt to implemnt global thread pool #4018 --- dbms/programs/server/HTTPHandler.cpp | 1 + dbms/programs/server/Server.cpp | 4 +++- dbms/programs/server/TCPHandler.cpp | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp index d86c526784b..5881314bea7 100644 --- a/dbms/programs/server/HTTPHandler.cpp +++ b/dbms/programs/server/HTTPHandler.cpp @@ -647,6 +647,7 @@ void HTTPHandler::trySendExceptionToClient(const std::string & s, int exception_ void HTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) { setThreadName("HTTPHandler"); + ThreadStatus thread_status; Output used_output; diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index 499f233ff28..176150297f2 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -129,9 +130,10 @@ std::string Server::getDefaultCorePath() const int Server::main(const std::vector & /*args*/) { Logger * log = &logger(); - UseSSL use_ssl; + ThreadStatus thread_status; + registerFunctions(); registerAggregateFunctions(); registerTableFunctions(); diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index c3dff11146e..9eccf1a819e 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -55,6 +55,7 @@ namespace ErrorCodes void TCPHandler::runImpl() { setThreadName("TCPHandler"); + ThreadStatus thread_status; connection_context = server.context(); connection_context.setSessionContext(connection_context); From b110738f619c9bcbe8a8ed1655df1300b20bdc2e Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 15 Jan 2019 22:14:47 +0300 Subject: [PATCH 071/586] alters fix --- dbms/src/Storages/AlterCommands.cpp | 3 +-- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 2 +- dbms/src/Storages/StorageMergeTree.cpp | 2 +- dbms/src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index e410dcddf4a..406ded9339d 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -383,12 +383,11 @@ void AlterCommands::apply(ColumnsDescription & columns_description, ASTPtr & ord auto new_columns_description = columns_description; auto new_order_by_ast = order_by_ast; auto new_primary_key_ast = primary_key_ast; - auto new_indexes_decl_ast = indexes_decl_ast; + auto new_indexes_decl_ast = indexes_decl_ast->clone(); for (const AlterCommand & command : *this) if (!command.ignore) command.apply(new_columns_description, new_order_by_ast, new_primary_key_ast, new_indexes_decl_ast); - columns_description = std::move(new_columns_description); order_by_ast = std::move(new_order_by_ast); primary_key_ast = std::move(new_primary_key_ast); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 3e51efa808d..eed6e25d06b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1042,7 +1042,7 @@ void MergeTreeData::checkAlter(const AlterCommands & commands) auto new_columns = getColumns(); ASTPtr new_order_by_ast = order_by_ast; ASTPtr new_primary_key_ast = primary_key_ast; - ASTPtr new_indexes_ast = skip_indexes_ast; + ASTPtr new_indexes_ast = skip_indexes_ast->clone(); commands.apply(new_columns, new_order_by_ast, new_primary_key_ast, new_indexes_ast); /// Set of columns that shouldn't be altered. diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 688fadaa909..c3aa533e6be 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -216,7 +216,7 @@ void StorageMergeTree::alter( auto new_columns = data.getColumns(); ASTPtr new_order_by_ast = data.order_by_ast; ASTPtr new_primary_key_ast = data.primary_key_ast; - ASTPtr new_indexes_ast = data.skip_indexes_ast; + ASTPtr new_indexes_ast = data.skip_indexes_ast->clone(); params.apply(new_columns, new_order_by_ast, new_primary_key_ast, new_indexes_ast); auto parts = data.getDataParts({MergeTreeDataPartState::PreCommitted, MergeTreeDataPartState::Committed, MergeTreeDataPartState::Outdated}); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 732ad0405cc..34ac516edca 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -3068,7 +3068,7 @@ void StorageReplicatedMergeTree::alter(const AlterCommands & params, ColumnsDescription new_columns = data.getColumns(); ASTPtr new_order_by_ast = data.order_by_ast; ASTPtr new_primary_key_ast = data.primary_key_ast; - ASTPtr new_indexes_ast = data.skip_indexes_ast; + ASTPtr new_indexes_ast = data.skip_indexes_ast->clone(); params.apply(new_columns, new_order_by_ast, new_primary_key_ast, new_indexes_ast); String new_columns_str = new_columns.toString(); From 8c35ccdd1d7798a0e968cefd7047caa32285dc05 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 16 Jan 2019 15:29:26 +0300 Subject: [PATCH 072/586] fix alters --- dbms/src/Storages/AlterCommands.cpp | 27 ++++++++++------- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 29 ++++++++++++++++--- dbms/src/Storages/MergeTree/MergeTreeData.h | 4 ++- .../ReplicatedMergeTreeAlterThread.cpp | 2 +- dbms/src/Storages/StorageMergeTree.cpp | 6 ++-- .../Storages/StorageReplicatedMergeTree.cpp | 6 ++-- 6 files changed, 53 insertions(+), 21 deletions(-) diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index 406ded9339d..f70e22a2dcd 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -331,37 +331,44 @@ void AlterCommand::apply(ColumnsDescription & columns_description, ASTPtr & orde ErrorCodes::ILLEGAL_COLUMN}; } - auto insert_it = indexes_decl_ast->children.end(); + auto new_indexes_decl_ast = indexes_decl_ast->clone(); + auto insert_it = new_indexes_decl_ast->children.end(); if (!after_index_name.empty()) { insert_it = std::find_if( - indexes_decl_ast->children.begin(), - indexes_decl_ast->children.end(), + new_indexes_decl_ast->children.begin(), + new_indexes_decl_ast->children.end(), [this](const ASTPtr & index_ast) { return typeid_cast(*index_ast).name == after_index_name; }); - if (insert_it == indexes_decl_ast->children.end()) { + if (insert_it == new_indexes_decl_ast->children.end()) { throw Exception("Wrong index name. Cannot find index `" + after_index_name + "` to insert after.", ErrorCodes::LOGICAL_ERROR); } } - indexes_decl_ast->children.emplace(insert_it, index_decl); + + new_indexes_decl_ast->children.emplace(insert_it, index_decl); + indexes_decl_ast = new_indexes_decl_ast; } else if (type == DROP_INDEX) { + auto new_indexes_decl_ast = indexes_decl_ast->clone(); + auto erase_it = std::find_if( - indexes_decl_ast->children.begin(), - indexes_decl_ast->children.end(), + new_indexes_decl_ast->children.begin(), + new_indexes_decl_ast->children.end(), [this](const ASTPtr & index_ast) { return typeid_cast(*index_ast).name == index_name; }); - if (erase_it == indexes_decl_ast->children.end()) + if (erase_it == new_indexes_decl_ast->children.end()) { throw Exception("Wrong index name. Cannot find index `" + index_name + "` to drop.", ErrorCodes::LOGICAL_ERROR); } - indexes_decl_ast->children.erase(erase_it); + + new_indexes_decl_ast->children.erase(erase_it); + indexes_decl_ast = new_indexes_decl_ast; } else throw Exception("Wrong parameter type in ALTER query", ErrorCodes::LOGICAL_ERROR); @@ -383,7 +390,7 @@ void AlterCommands::apply(ColumnsDescription & columns_description, ASTPtr & ord auto new_columns_description = columns_description; auto new_order_by_ast = order_by_ast; auto new_primary_key_ast = primary_key_ast; - auto new_indexes_decl_ast = indexes_decl_ast->clone(); + auto new_indexes_decl_ast = indexes_decl_ast; for (const AlterCommand & command : *this) if (!command.ignore) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index eed6e25d06b..b30dd4c5bf1 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1042,7 +1042,7 @@ void MergeTreeData::checkAlter(const AlterCommands & commands) auto new_columns = getColumns(); ASTPtr new_order_by_ast = order_by_ast; ASTPtr new_primary_key_ast = primary_key_ast; - ASTPtr new_indexes_ast = skip_indexes_ast->clone(); + ASTPtr new_indexes_ast = skip_indexes_ast; commands.apply(new_columns, new_order_by_ast, new_primary_key_ast, new_indexes_ast); /// Set of columns that shouldn't be altered. @@ -1130,11 +1130,13 @@ void MergeTreeData::checkAlter(const AlterCommands & commands) NameToNameMap unused_map; bool unused_bool; - createConvertExpression(nullptr, getColumns().getAllPhysical(), new_columns.getAllPhysical(), unused_expression, unused_map, unused_bool); + createConvertExpression(nullptr, getColumns().getAllPhysical(), new_columns.getAllPhysical(), + skip_indexes_ast, new_indexes_ast,unused_expression, unused_map, unused_bool); } void MergeTreeData::createConvertExpression(const DataPartPtr & part, const NamesAndTypesList & old_columns, const NamesAndTypesList & new_columns, - ExpressionActionsPtr & out_expression, NameToNameMap & out_rename_map, bool & out_force_update_metadata) const + const ASTPtr & old_indices_ast, const ASTPtr & new_indices_ast, ExpressionActionsPtr & out_expression, + NameToNameMap & out_rename_map, bool & out_force_update_metadata) const { out_expression = nullptr; out_rename_map = {}; @@ -1148,6 +1150,22 @@ void MergeTreeData::createConvertExpression(const DataPartPtr & part, const Name /// For every column that need to be converted: source column name, column name of calculated expression for conversion. std::vector> conversions; + + /// Remove old indices + std::set new_indices; + for (const auto & index_decl : new_indices_ast->children) + new_indices.emplace(dynamic_cast(*index_decl.get()).name); + + for (const auto & index_decl : old_indices_ast->children) + { + const auto & index = dynamic_cast(*index_decl.get()); + if (!new_indices.count(index.name)) + { + out_rename_map["skp_idx_" + index.name + ".idx"] = ""; + out_rename_map["skp_idx_" + index.name + ".mrk"] = ""; + } + } + /// Collect counts for shared streams of different columns. As an example, Nested columns have shared stream with array sizes. std::map stream_counts; for (const NameAndTypePair & column : old_columns) @@ -1278,12 +1296,15 @@ void MergeTreeData::createConvertExpression(const DataPartPtr & part, const Name MergeTreeData::AlterDataPartTransactionPtr MergeTreeData::alterDataPart( const DataPartPtr & part, const NamesAndTypesList & new_columns, + const ASTPtr & new_indices_ast, bool skip_sanity_checks) { ExpressionActionsPtr expression; AlterDataPartTransactionPtr transaction(new AlterDataPartTransaction(part)); /// Blocks changes to the part. bool force_update_metadata; - createConvertExpression(part, part->columns, new_columns, expression, transaction->rename_map, force_update_metadata); + createConvertExpression(part, part->columns, new_columns, + skip_indexes_ast, new_indices_ast, + expression, transaction->rename_map, force_update_metadata); size_t num_files_to_modify = transaction->rename_map.size(); size_t num_files_to_remove = 0; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 70ae8b25c67..900005baa2f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -487,6 +487,7 @@ public: AlterDataPartTransactionPtr alterDataPart( const DataPartPtr & part, const NamesAndTypesList & new_columns, + const ASTPtr & new_indices_ast, bool skip_sanity_checks); /// Freezes all parts. @@ -739,7 +740,8 @@ private: /// Files to be deleted are mapped to an empty string in out_rename_map. /// If part == nullptr, just checks that all type conversions are possible. void createConvertExpression(const DataPartPtr & part, const NamesAndTypesList & old_columns, const NamesAndTypesList & new_columns, - ExpressionActionsPtr & out_expression, NameToNameMap & out_rename_map, bool & out_force_update_metadata) const; + const ASTPtr & old_indices_ast, const ASTPtr & new_indices_ast, + ExpressionActionsPtr & out_expression, NameToNameMap & out_rename_map, bool & out_force_update_metadata) const; /// Calculates column sizes in compressed form for the current state of data_parts. Call with data_parts mutex locked. void calculateColumnSizesImpl(); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp index 438bca365ea..d0d8f26a03b 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp @@ -150,7 +150,7 @@ void ReplicatedMergeTreeAlterThread::run() /// Update the part and write result to temporary files. /// TODO: You can skip checking for too large changes if ZooKeeper has, for example, /// node /flags/force_alter. - auto transaction = storage.data.alterDataPart(part, columns_for_parts, false); + auto transaction = storage.data.alterDataPart(part, columns_for_parts, storage.data.skip_indexes_ast, false); if (!transaction) continue; diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index c3aa533e6be..94ab7eae324 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -216,7 +216,7 @@ void StorageMergeTree::alter( auto new_columns = data.getColumns(); ASTPtr new_order_by_ast = data.order_by_ast; ASTPtr new_primary_key_ast = data.primary_key_ast; - ASTPtr new_indexes_ast = data.skip_indexes_ast->clone(); + ASTPtr new_indexes_ast = data.skip_indexes_ast; params.apply(new_columns, new_order_by_ast, new_primary_key_ast, new_indexes_ast); auto parts = data.getDataParts({MergeTreeDataPartState::PreCommitted, MergeTreeDataPartState::Committed, MergeTreeDataPartState::Outdated}); @@ -224,7 +224,7 @@ void StorageMergeTree::alter( std::vector transactions; for (const MergeTreeData::DataPartPtr & part : parts) { - if (auto transaction = data.alterDataPart(part, columns_for_parts, false)) + if (auto transaction = data.alterDataPart(part, columns_for_parts, new_indexes_ast, false)) transactions.push_back(std::move(transaction)); } @@ -715,7 +715,7 @@ void StorageMergeTree::clearColumnInPartition(const ASTPtr & partition, const Fi if (part->info.partition_id != partition_id) throw Exception("Unexpected partition ID " + part->info.partition_id + ". This is a bug.", ErrorCodes::LOGICAL_ERROR); - if (auto transaction = data.alterDataPart(part, columns_for_parts, false)) + if (auto transaction = data.alterDataPart(part, columns_for_parts, ignored_indexes_ast, false)) transactions.push_back(std::move(transaction)); LOG_DEBUG(log, "Removing column " << get(column_name) << " from part " << part->name); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 34ac516edca..21a4aad03cc 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -1510,7 +1510,7 @@ void StorageReplicatedMergeTree::executeClearColumnInPartition(const LogEntry & LOG_DEBUG(log, "Clearing column " << entry.column_name << " in part " << part->name); - auto transaction = data.alterDataPart(part, columns_for_parts, false); + auto transaction = data.alterDataPart(part, columns_for_parts, ignored_indexes_ast, false); if (!transaction) continue; @@ -3068,7 +3068,7 @@ void StorageReplicatedMergeTree::alter(const AlterCommands & params, ColumnsDescription new_columns = data.getColumns(); ASTPtr new_order_by_ast = data.order_by_ast; ASTPtr new_primary_key_ast = data.primary_key_ast; - ASTPtr new_indexes_ast = data.skip_indexes_ast->clone(); + ASTPtr new_indexes_ast = data.skip_indexes_ast; params.apply(new_columns, new_order_by_ast, new_primary_key_ast, new_indexes_ast); String new_columns_str = new_columns.toString(); @@ -3078,6 +3078,8 @@ void StorageReplicatedMergeTree::alter(const AlterCommands & params, ReplicatedMergeTreeTableMetadata new_metadata(data); if (new_order_by_ast.get() != data.order_by_ast.get()) new_metadata.sorting_key = serializeAST(*MergeTreeData::extractKeyExpressionList(new_order_by_ast)); + if (new_indexes_ast.get() != data.skip_indexes_ast.get()) + new_metadata.skip_indexes = serializeAST(*new_indexes_ast.get()); String new_metadata_str = new_metadata.toString(); if (new_metadata_str != ReplicatedMergeTreeTableMetadata(data).toString()) From 65539bfea0371f2bcea9e4a090521dbe7da70cf4 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 16 Jan 2019 16:05:27 +0300 Subject: [PATCH 073/586] fix "after" --- dbms/src/Storages/AlterCommands.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index f70e22a2dcd..e4f950830d1 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -346,6 +346,7 @@ void AlterCommand::apply(ColumnsDescription & columns_description, ASTPtr & orde throw Exception("Wrong index name. Cannot find index `" + after_index_name + "` to insert after.", ErrorCodes::LOGICAL_ERROR); } + ++insert_it; } new_indexes_decl_ast->children.emplace(insert_it, index_decl); From 2de2b6f32e2c277273efd4bd79700ae75f3f5152 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 16 Jan 2019 19:53:38 +0300 Subject: [PATCH 074/586] fixed alter --- dbms/src/Storages/AlterCommands.cpp | 17 +- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 23 +- dbms/src/Storages/MergeTree/MergeTreeData.h | 2 +- .../ReplicatedMergeTreeTableMetadata.cpp | 17 +- .../ReplicatedMergeTreeTableMetadata.h | 5 +- .../Storages/StorageReplicatedMergeTree.cpp | 40 +- dbms/tests/queries/clickhouse-functions | 558 ++++++++++++++++++ dbms/tests/queries/clickhouse-table_functions | 10 + 8 files changed, 637 insertions(+), 35 deletions(-) create mode 100644 dbms/tests/queries/clickhouse-functions create mode 100644 dbms/tests/queries/clickhouse-table_functions diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index e4f950830d1..a4519800534 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -317,9 +317,15 @@ void AlterCommand::apply(ColumnsDescription & columns_description, ASTPtr & orde } else if (type == ADD_INDEX) { + ASTPtr new_indexes_decl_ast; + if (indexes_decl_ast) + new_indexes_decl_ast = indexes_decl_ast->clone(); + else + new_indexes_decl_ast = ASTExpressionList().ptr(); + if (std::any_of( - indexes_decl_ast->children.cbegin(), - indexes_decl_ast->children.cend(), + new_indexes_decl_ast->children.cbegin(), + new_indexes_decl_ast->children.cend(), [this](const ASTPtr & index_ast) { return typeid_cast(*index_ast).name == index_name; })) @@ -331,7 +337,6 @@ void AlterCommand::apply(ColumnsDescription & columns_description, ASTPtr & orde ErrorCodes::ILLEGAL_COLUMN}; } - auto new_indexes_decl_ast = indexes_decl_ast->clone(); auto insert_it = new_indexes_decl_ast->children.end(); if (!after_index_name.empty()) @@ -354,7 +359,11 @@ void AlterCommand::apply(ColumnsDescription & columns_description, ASTPtr & orde } else if (type == DROP_INDEX) { - auto new_indexes_decl_ast = indexes_decl_ast->clone(); + ASTPtr new_indexes_decl_ast; + if (indexes_decl_ast) + new_indexes_decl_ast = indexes_decl_ast->clone(); + else + new_indexes_decl_ast = ASTExpressionList().ptr(); auto erase_it = std::find_if( new_indexes_decl_ast->children.begin(), diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index b30dd4c5bf1..987026d60a6 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1131,7 +1131,7 @@ void MergeTreeData::checkAlter(const AlterCommands & commands) bool unused_bool; createConvertExpression(nullptr, getColumns().getAllPhysical(), new_columns.getAllPhysical(), - skip_indexes_ast, new_indexes_ast,unused_expression, unused_map, unused_bool); + skip_indexes_ast, new_indexes_ast, unused_expression, unused_map, unused_bool); } void MergeTreeData::createConvertExpression(const DataPartPtr & part, const NamesAndTypesList & old_columns, const NamesAndTypesList & new_columns, @@ -1153,18 +1153,19 @@ void MergeTreeData::createConvertExpression(const DataPartPtr & part, const Name /// Remove old indices std::set new_indices; - for (const auto & index_decl : new_indices_ast->children) - new_indices.emplace(dynamic_cast(*index_decl.get()).name); - - for (const auto & index_decl : old_indices_ast->children) - { - const auto & index = dynamic_cast(*index_decl.get()); - if (!new_indices.count(index.name)) + if (new_indices_ast) + for (const auto & index_decl : new_indices_ast->children) + new_indices.emplace(dynamic_cast(*index_decl.get()).name); + if (old_indices_ast) + for (const auto & index_decl : old_indices_ast->children) { - out_rename_map["skp_idx_" + index.name + ".idx"] = ""; - out_rename_map["skp_idx_" + index.name + ".mrk"] = ""; + const auto & index = dynamic_cast(*index_decl.get()); + if (!new_indices.count(index.name)) + { + out_rename_map["skp_idx_" + index.name + ".idx"] = ""; + out_rename_map["skp_idx_" + index.name + ".mrk"] = ""; + } } - } /// Collect counts for shared streams of different columns. As an example, Nested columns have shared stream with array sizes. std::map stream_counts; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 900005baa2f..bdfe3247e3c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -476,7 +476,7 @@ public: /// Check if the ALTER can be performed: /// - all needed columns are present. /// - all type conversions can be done. - /// - columns corresponding to primary key, sign, sampling expression and date are not affected. + /// - columns corresponding to primary key, indices, sign, sampling expression and date are not affected. /// If something is wrong, throws an exception. void checkAlter(const AlterCommands & commands); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index aaabc6901ae..15d343924ac 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -184,10 +184,19 @@ ReplicatedMergeTreeTableMetadata::checkAndFindDiff(const ReplicatedMergeTreeTabl } if (skip_indexes != from_zk.skip_indexes) - throw Exception("Existing table metadata in ZooKeeper differs in skip indexes." - " Stored in ZooKeeper: " + from_zk.skip_indexes + - ", local: " + skip_indexes, - ErrorCodes::METADATA_MISMATCH); + { + if (allow_alter) + { + diff.skip_indices_changed = true; + diff.new_skip_indices = from_zk.skip_indexes; + } + else + throw Exception( + "Existing table metadata in ZooKeeper differs in skip indexes." + " Stored in ZooKeeper: " + from_zk.skip_indexes + + ", local: " + skip_indexes, + ErrorCodes::METADATA_MISMATCH); + } return diff; } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h index 5fd863046e4..19524e1b09a 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h @@ -41,7 +41,10 @@ struct ReplicatedMergeTreeTableMetadata bool sorting_key_changed = false; String new_sorting_key; - bool empty() const { return !sorting_key_changed; } + bool skip_indices_changed = false; + String new_skip_indices; + + bool empty() const { return !sorting_key_changed && !skip_indices_changed; } }; Diff checkAndFindDiff(const ReplicatedMergeTreeTableMetadata & from_zk, bool allow_alter) const; diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 21a4aad03cc..318bf634e3f 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -418,26 +418,36 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column { ASTPtr new_primary_key_ast = data.primary_key_ast; ASTPtr new_order_by_ast = data.order_by_ast; + ASTPtr new_indices_ast = data.skip_indexes_ast; IDatabase::ASTModifier storage_modifier; if (!metadata_diff.empty()) { - ParserNotEmptyExpressionList parser(false); - auto new_sorting_key_expr_list = parseQuery(parser, metadata_diff.new_sorting_key, 0); - - if (new_sorting_key_expr_list->children.size() == 1) - new_order_by_ast = new_sorting_key_expr_list->children[0]; - else + if (metadata_diff.sorting_key_changed) { - auto tuple = makeASTFunction("tuple"); - tuple->arguments->children = new_sorting_key_expr_list->children; - new_order_by_ast = tuple; + ParserNotEmptyExpressionList parser(false); + auto new_sorting_key_expr_list = parseQuery(parser, metadata_diff.new_sorting_key, 0); + + if (new_sorting_key_expr_list->children.size() == 1) + new_order_by_ast = new_sorting_key_expr_list->children[0]; + else + { + auto tuple = makeASTFunction("tuple"); + tuple->arguments->children = new_sorting_key_expr_list->children; + new_order_by_ast = tuple; + } + + if (!data.primary_key_ast) + { + /// Primary and sorting key become independent after this ALTER so we have to + /// save the old ORDER BY expression as the new primary key. + new_primary_key_ast = data.order_by_ast->clone(); + } } - if (!data.primary_key_ast) + if (metadata_diff.skip_indices_changed) { - /// Primary and sorting key become independent after this ALTER so we have to - /// save the old ORDER BY expression as the new primary key. - new_primary_key_ast = data.order_by_ast->clone(); + ParserIndexDeclaration parser; + new_indices_ast = parseQuery(parser, metadata_diff.new_skip_indices, 0); } storage_modifier = [&](IAST & ast) @@ -453,6 +463,8 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column storage_ast.set(storage_ast.primary_key, new_primary_key_ast); storage_ast.set(storage_ast.order_by, new_order_by_ast); + + storage_ast.set(storage_ast.indexes, new_indices_ast); }; } @@ -461,7 +473,7 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column /// Even if the primary/sorting keys didn't change we must reinitialize it /// because primary key column types might have changed. data.setPrimaryKeyAndColumns(new_order_by_ast, new_primary_key_ast, new_columns); - data.setSkipIndexes(data.skip_indexes_ast); + data.setSkipIndexes(new_indices_ast); } diff --git a/dbms/tests/queries/clickhouse-functions b/dbms/tests/queries/clickhouse-functions new file mode 100644 index 00000000000..87ae1c21f88 --- /dev/null +++ b/dbms/tests/queries/clickhouse-functions @@ -0,0 +1,558 @@ +convertCharset +transform +findClusterValue +findClusterIndex +toNullable +coalesce +isNotNull +pointInEllipses +pow +acos +asin +tan +cos +lgamma +erfc +erf +sqrt +log10 +exp10 +e +visitParamExtractFloat +visitParamExtractUInt +decodeURLComponent +cutURLParameter +cutQueryStringAndFragment +cutFragment +cutWWW +URLPathHierarchy +URLHierarchy +extractURLParameterNames +extractURLParameter +queryStringAndFragment +pathFull +sin +topLevelDomain +domainWithoutWWW +domain +protocol +greatCircleDistance +extract +match +positionCaseInsensitiveUTF8 +positionUTF8 +position +replaceRegexpAll +replaceRegexpOne +arrayStringConcat +splitByString +alphaTokens +tgamma +base64Decode +regexpQuoteMeta +positionCaseInsensitive +trimBoth +trimRight +endsWith +startsWith +appendTrailingCharIfAbsent +substringUTF8 +concatAssumeInjective +reverseUTF8 +upperUTF8 +upper +lower +length +notEmpty +trunc +round +reinterpretAsString +reinterpretAsDateTime +reinterpretAsDate +reinterpretAsFloat64 +reinterpretAsFloat32 +reinterpretAsInt64 +reinterpretAsInt8 +reinterpretAsUInt32 +generateUUIDv4 +rand +toISOYear +toISOWeek +concat +toDecimal64 +ifNull +toStartOfDay +toSecond +addSeconds +sleepEachRow +toDayOfWeek +toDayOfMonth +reinterpretAsUInt8 +UUIDNumToString +bitShiftLeft +toDate +sleep +emptyArrayUInt8 +parseDateTimeBestEffort +toFloat32OrNull +toInt16 +IPv6NumToString +atan +substring +arrayIntersect +isInfinite +visitParamExtractString +globalNotIn +toDateTimeOrZero +toRelativeHourNum +toIntervalYear +toFloat64OrZero +dateDiff +hex +arrayEnumerateDense +toUInt8OrZero +toRelativeSecondNum +toUInt64OrNull +MACNumToString +toInt32OrNull +toDayOfYear +toUnixTimestamp +toString +toDateOrZero +subtractDays +murmurHash2_32 +toUInt64 +toUInt8 +dictGetDateTime +empty +isFinite +caseWithExpression +caseWithoutExpression +caseWithoutExpr +visitParamExtractRaw +queryString +dictGetInt32OrDefault +tryBase64Decode +toInt8OrZero +multiIf +if +intExp10 +bitShiftRight +less +roundToExp2 +toUInt8OrNull +dictGetUInt16 +like +regionToPopulation +MACStringToOUI +parseDateTimeBestEffortOrNull +not +toInt32OrZero +arrayFilter +toInt16OrZero +range +equals +now +toInt8OrNull +bitmaskToArray +roundAge +toIntervalMonth +toUUID +notEquals +toInt16OrNull +murmurHash2_64 +hasAny +joinGet +toIntervalMinute +notLike +lcm +toStringCutToZero +isNull +tupleElement +replaceAll +parseDateTimeBestEffortOrZero +toFloat32OrZero +lowerUTF8 +notIn +gcd +murmurHash3_64 +toMinute +toDateTimeOrNull +toTime +materialize +roundDuration +gccMurmurHash +ceil +toStartOfQuarter +dictGetInt8OrDefault +MACStringToNum +toTypeName +toUInt32OrNull +emptyArrayString +dictGetDateTimeOrDefault +bitRotateRight +cutIPv6 +toUInt32OrZero +timezone +arrayPopFront +toInt32 +intHash64 +extractURLParameters +lowCardinalityIndices +toStartOfMonth +toYear +hasAll +rowNumberInAllBlocks +bitTestAll +arrayCount +arraySort +abs +bitNot +intDiv +intDivOrZero +firstSignificantSubdomain +reinterpretAsUInt16 +dictGetFloat32OrDefault +toHour +minus +regionToArea +unhex +IPv4StringToNum +toIntervalHour +toInt8 +dictGetFloat32 +log +IPv4NumToString +modulo +arrayEnumerate +reinterpretAsUInt64 +toMonth +visitParamExtractBool +emptyArrayUInt64 +replaceOne +arrayReverseSort +toFloat32 +trimLeft +toRelativeMonthNum +emptyArrayInt32 +randConstant +CAST +bitRotateLeft +toRelativeYearNum +negate +toUInt64OrZero +emptyArrayFloat64 +bitTest +toDecimal128 +plus +or +cutQueryString +reinterpretAsFixedString +countEqual +arrayPopBack +arrayElement +log2 +array +arrayReverse +arrayPushBack +subtractWeeks +bitTestAny +bitAnd +base64Encode +toDecimal32 +arrayPushFront +lessOrEquals +intExp2 +toUInt16OrZero +arrayConcat +arrayCumSum +arraySlice +addDays +dictGetUInt8 +toUInt32 +bitOr +caseWithExpr +UUIDStringToNum +emptyArrayUInt16 +toIntervalDay +MD5 +emptyArrayUInt32 +emptyArrayInt8 +toMonday +addMonths +CHAR_LENGTH +least +divide +arrayUniq +SHA256 +arrayExists +multiply +toUInt16OrNull +dictGetInt8 +visitParamHas +emptyArrayInt64 +toIntervalSecond +emptyArrayToSingle +path +toInt64OrZero +SHA1 +extractAll +roundDown +emptyArrayDate +dumpColumnStructure +lengthUTF8 +greatest +arrayEnumerateUniq +arrayDistinct +javaHash +arrayFirst +toFixedString +IPv4NumToStringClassC +toFloat64OrNull +IPv4ToIPv6 +identity +toStartOfYear +visitParamExtractInt +toStartOfMinute +toStartOfFiveMinute +toStartOfFifteenMinutes +toStartOfHour +sumburConsistentHash +toStartOfISOYear +toRelativeQuarterNum +toRelativeWeekNum +toRelativeDayNum +cbrt +yesterday +bitXor +timeSlot +timeSlots +emptyArrayInt16 +dictGetInt16 +toYYYYMM +toYYYYMMDDhhmmss +toUInt16 +addMinutes +addHours +addWeeks +nullIf +subtractSeconds +subtractMinutes +toIntervalWeek +subtractHours +isNaN +subtractMonths +reverse +runningDifferenceStartingWithFirstValue +toDateTime +subtractQuarters +tuple +arrayCumSumNonNegative +rowNumberInBlock +toDateOrNull +subtractYears +toTimeZone +formatDateTime +has +cityHash64 +intHash32 +fragment +regionToCity +dictGetOrDefault +indexOf +regionToDistrict +regionToCountry +visibleWidth +regionToContinent +regionToTopContinent +toColumnTypeName +regionHierarchy +dictGetDate +dictHas +dictGetUInt64 +cutToFirstSignificantSubdomain +dictGetInt32 +pointInPolygon +dictGetInt64 +blockNumber +IPv6StringToNum +dictGetString +dictGetFloat64 +dictGetUUID +CHARACTER_LENGTH +toQuarter +dictGetHierarchy +toFloat64 +arraySum +toInt64OrNull +dictIsIn +exp +floor +dictGetUInt8OrDefault +dictGetUInt16OrDefault +dictGetUInt32OrDefault +emptyArrayDateTime +greater +jumpConsistentHash +dictGetUInt64OrDefault +dictGetInt16OrDefault +dictGetInt64OrDefault +reinterpretAsInt32 +dictGetUInt32 +murmurHash3_32 +rand64 +dictGetDateOrDefault +bar +dictGetUUIDOrDefault +regionToName +dictGetStringOrDefault +splitByChar +dictGet +modelEvaluate +arrayReduce +farmHash64 +bitmaskToList +formatReadableSize +halfMD5 +SHA224 +arrayMap +sipHash64 +dictGetFloat64OrDefault +sipHash128 +metroHash64 +hiveHash +murmurHash3_128 +toInt64 +xxHash32 +xxHash64 +yandexConsistentHash +emptyArrayFloat32 +arrayAll +toYYYYMMDD +today +arrayFirstIndex +greaterOrEquals +arrayDifference +toIntervalQuarter +throwIf +and +xor +addQuarters +currentDatabase +hostName +URLHash +getSizeOfEnumType +defaultValueOfArgumentType +blockSize +arrayResize +ignore +toRelativeMinuteNum +indexHint +reinterpretAsInt16 +addYears +arrayJoin +replicate +hasColumnInTable +version +regionIn +uptime +runningAccumulate +runningDifference +assumeNotNull +pi +finalizeAggregation +toLowCardinality +exp2 +lowCardinalityKeys +in +globalIn +power +ln +replace +locate +ceiling +truncate +lcase +ucase +substr +mid +retention +maxIntersections +groupBitXor +groupBitOr +uniqUpTo +uniqExact +uniq +covarPop +stddevPop +varPop +covarSamp +varSamp +sumMap +corrStable +corr +quantileTiming +quantileDeterministic +quantilesExact +uniqHLL12 +quantilesTiming +covarPopStable +stddevSampStable +quantilesExactWeighted +quantileExactWeighted +quantileTimingWeighted +quantileExact +quantilesDeterministic +quantiles +topK +sumWithOverflow +count +groupArray +stddevSamp +groupArrayInsertAt +quantile +quantilesTimingWeighted +sum +covarSampStable +anyLast +quantileTDigest +quantilesTDigest +windowFunnel +min +argMax +varSampStable +maxIntersectionsPosition +quantilesTDigestWeighted +groupUniqArray +sequenceCount +uniqCombined +boundingRatio +sumKahan +any +anyHeavy +histogram +quantileTDigestWeighted +max +groupBitAnd +argMin +varPopStable +avg +sequenceMatch +stddevPopStable +BIT_XOR +medianExactWeighted +medianTiming +medianExact +median +medianDeterministic +VAR_SAMP +STDDEV_POP +medianTDigest +VAR_POP +medianTDigestWeighted +BIT_OR +STDDEV_SAMP +medianTimingWeighted +COVAR_SAMP +COVAR_POP +BIT_AND diff --git a/dbms/tests/queries/clickhouse-table_functions b/dbms/tests/queries/clickhouse-table_functions new file mode 100644 index 00000000000..0ee110959df --- /dev/null +++ b/dbms/tests/queries/clickhouse-table_functions @@ -0,0 +1,10 @@ +jdbc +odbc +hdfs +remote +catBoostPool +merge +file +cluster +url +numbers From b4ce03ea7e9e0ff37495902f77966e7d68619ac3 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 16 Jan 2019 20:59:47 +0300 Subject: [PATCH 075/586] alter fix + test --- dbms/src/Storages/AlterCommands.cpp | 4 +- .../0_stateless/00824_indices_alter.reference | 0 .../0_stateless/00824_indices_alter.sql | 68 +++++++++++++++++++ 3 files changed, 70 insertions(+), 2 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00824_indices_alter.reference create mode 100644 dbms/tests/queries/0_stateless/00824_indices_alter.sql diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index a4519800534..2d51afa1c46 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -321,7 +321,7 @@ void AlterCommand::apply(ColumnsDescription & columns_description, ASTPtr & orde if (indexes_decl_ast) new_indexes_decl_ast = indexes_decl_ast->clone(); else - new_indexes_decl_ast = ASTExpressionList().ptr(); + new_indexes_decl_ast = std::make_shared(); if (std::any_of( new_indexes_decl_ast->children.cbegin(), @@ -363,7 +363,7 @@ void AlterCommand::apply(ColumnsDescription & columns_description, ASTPtr & orde if (indexes_decl_ast) new_indexes_decl_ast = indexes_decl_ast->clone(); else - new_indexes_decl_ast = ASTExpressionList().ptr(); + new_indexes_decl_ast = std::make_shared(); auto erase_it = std::find_if( new_indexes_decl_ast->children.begin(), diff --git a/dbms/tests/queries/0_stateless/00824_indices_alter.reference b/dbms/tests/queries/0_stateless/00824_indices_alter.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/00824_indices_alter.sql b/dbms/tests/queries/0_stateless/00824_indices_alter.sql new file mode 100644 index 00000000000..40ef6bb4fc8 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00824_indices_alter.sql @@ -0,0 +1,68 @@ +DROP TABLE IF EXISTS test.minmax_idx; +DROP TABLE IF EXISTS test.minmax_idx2; + +CREATE TABLE test.minmax_idx +( + u64 UInt64, + i32 Int32 +) ENGINE = MergeTree() +ORDER BY u64; + +INSERT INTO test.minmax_idx VALUES (1, 2); + +ALTER TABLE test.minmax_idx ADD INDEX idx1 BY u64 * i32 TYPE minmax GRANULARITY 10; +ALTER TABLE test.minmax_idx ADD INDEX idx2 BY u64 * i32 TYPE minmax GRANULARITY 10; +ALTER TABLE test.minmax_idx ADD INDEX idx3 BY u64 * i32 TYPE minmax GRANULARITY 10 AFTER idx1; + +SHOW CREATE TABLE test.minmax_idx; + +SELECT * FROM test.minmax_idx WHERE u64 * i32 = 3; + +INSERT INTO test.minmax_idx VALUES (1, 2); +INSERT INTO test.minmax_idx VALUES (1, 2); +INSERT INTO test.minmax_idx VALUES (1, 2); +INSERT INTO test.minmax_idx VALUES (1, 2); +INSERT INTO test.minmax_idx VALUES (1, 2); + +SELECT * FROM test.minmax_idx WHERE u64 * i32 = 3; + +ALTER TABLE test.minmax_idx DROP INDEX idx1 + +SHOW CREATE TABLE test.minmax_idx; + +SELECT * FROM test.minmax_idx WHERE u64 * i32 = 3; + +ALTER TABLE test.minmax_idx DROP INDEX idx3; +ALTER TABLE test.minmax_idx DROP INDEX idx2; + +SHOW CREATE TABLE test.minmax_idx; + +ALTER TABLE test.minmax_idx ADD INDEX idx1 BY u64 * i32 TYPE minmax GRANULARITY 10; + +SHOW CREATE TABLE test.minmax_idx; + +SELECT * FROM test.minmax_idx WHERE u64 * i32 = 3; + + +CREATE TABLE test.minmax_idx2 +( + u64 UInt64, + i32 Int32 +) ENGINE = MergeTree() + idx2 BY u64 * i32 TYPE minmax GRANULARITY 10 +INDEXES idx1 BY u64 * i32 TYPE minmax GRANULARITY 10, +ORDER BY u64; + +INSERT INTO test.minmax_idx2 VALUES (1, 2); +INSERT INTO test.minmax_idx2 VALUES (1, 2); + +SELECT * FROM test.minmax_idx2 WHERE u64 * i32 = 3; + +ALTER TABLE test.minmax_idx2 DROP INDEX idx1, DROP INDEX idx2; + +SHOW CREATE TABLE test.minmax_idx2; + +SELECT * FROM test.minmax_idx2 WHERE u64 * i32 = 3; + +DROP TABLE test.minmax_idx; +DROP TABLE test.minmax_idx2; \ No newline at end of file From 08d23c16d10c2edd2306359481ce9fb316ee0919 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 16 Jan 2019 21:24:38 +0300 Subject: [PATCH 076/586] fixes --- dbms/src/Storages/StorageMergeTree.cpp | 7 ++++++- dbms/src/Storages/StorageReplicatedMergeTree.cpp | 7 ++++++- .../queries/0_stateless/00824_indices_alter.sql | 12 ++++++------ 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 94ab7eae324..a3dfd5ba6ea 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -241,7 +241,12 @@ void StorageMergeTree::alter( storage_ast.set(storage_ast.primary_key, new_primary_key_ast); if (new_indexes_ast.get() != data.skip_indexes_ast.get()) - storage_ast.set(storage_ast.indexes, new_indexes_ast); + { + if (new_indexes_ast && !new_indexes_ast->children.empty()) + storage_ast.set(storage_ast.indexes, new_indexes_ast); + else + storage_ast.set(storage_ast.indexes, nullptr); + } }; context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, storage_modifier); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 318bf634e3f..610b4737810 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -3091,7 +3091,12 @@ void StorageReplicatedMergeTree::alter(const AlterCommands & params, if (new_order_by_ast.get() != data.order_by_ast.get()) new_metadata.sorting_key = serializeAST(*MergeTreeData::extractKeyExpressionList(new_order_by_ast)); if (new_indexes_ast.get() != data.skip_indexes_ast.get()) - new_metadata.skip_indexes = serializeAST(*new_indexes_ast.get()); + { + if (new_indexes_ast && !new_indexes_ast->children.empty()) + new_metadata.skip_indexes = serializeAST(*new_indexes_ast.get()); + else + new_metadata.skip_indexes = {}; + } String new_metadata_str = new_metadata.toString(); if (new_metadata_str != ReplicatedMergeTreeTableMetadata(data).toString()) diff --git a/dbms/tests/queries/0_stateless/00824_indices_alter.sql b/dbms/tests/queries/0_stateless/00824_indices_alter.sql index 40ef6bb4fc8..2b18ab115e7 100644 --- a/dbms/tests/queries/0_stateless/00824_indices_alter.sql +++ b/dbms/tests/queries/0_stateless/00824_indices_alter.sql @@ -11,8 +11,8 @@ ORDER BY u64; INSERT INTO test.minmax_idx VALUES (1, 2); ALTER TABLE test.minmax_idx ADD INDEX idx1 BY u64 * i32 TYPE minmax GRANULARITY 10; -ALTER TABLE test.minmax_idx ADD INDEX idx2 BY u64 * i32 TYPE minmax GRANULARITY 10; -ALTER TABLE test.minmax_idx ADD INDEX idx3 BY u64 * i32 TYPE minmax GRANULARITY 10 AFTER idx1; +ALTER TABLE test.minmax_idx ADD INDEX idx2 BY u64 + i32 TYPE minmax GRANULARITY 10; +ALTER TABLE test.minmax_idx ADD INDEX idx3 BY u64 - i32 TYPE minmax GRANULARITY 10 AFTER idx1; SHOW CREATE TABLE test.minmax_idx; @@ -26,7 +26,7 @@ INSERT INTO test.minmax_idx VALUES (1, 2); SELECT * FROM test.minmax_idx WHERE u64 * i32 = 3; -ALTER TABLE test.minmax_idx DROP INDEX idx1 +ALTER TABLE test.minmax_idx DROP INDEX idx1; SHOW CREATE TABLE test.minmax_idx; @@ -49,9 +49,9 @@ CREATE TABLE test.minmax_idx2 u64 UInt64, i32 Int32 ) ENGINE = MergeTree() - idx2 BY u64 * i32 TYPE minmax GRANULARITY 10 -INDEXES idx1 BY u64 * i32 TYPE minmax GRANULARITY 10, -ORDER BY u64; +ORDER BY u64 +INDEXES idx1 BY u64 + i32 TYPE minmax GRANULARITY 10, + idx2 BY u64 * i32 TYPE minmax GRANULARITY 10; INSERT INTO test.minmax_idx2 VALUES (1, 2); INSERT INTO test.minmax_idx2 VALUES (1, 2); From 579832ff9bc9e16cd01a70872092e14250c5fd80 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 16 Jan 2019 21:35:03 +0300 Subject: [PATCH 077/586] upd setSkipIndexes --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 5 ++++ .../0_stateless/00824_indices_alter.reference | 28 +++++++++++++++++++ .../0_stateless/00824_indices_alter.sql | 14 +++++----- 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 987026d60a6..a199e1bda7a 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -353,6 +353,11 @@ void MergeTreeData::setSkipIndexes(const ASTPtr & indexes_asts, bool only_check) { if (!indexes_asts) { + if (!only_check) + { + skip_indexes_ast = indexes_asts; + indexes.clear(); + } return; } diff --git a/dbms/tests/queries/0_stateless/00824_indices_alter.reference b/dbms/tests/queries/0_stateless/00824_indices_alter.reference index e69de29bb2d..a70cc705f86 100644 --- a/dbms/tests/queries/0_stateless/00824_indices_alter.reference +++ b/dbms/tests/queries/0_stateless/00824_indices_alter.reference @@ -0,0 +1,28 @@ +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDER BY u64 INDEXES idx1 BY u64 * i32 TYPE minmax GRANULARITY 10, idx3 BY u64 - i32 TYPE minmax GRANULARITY 10, idx2 BY u64 + i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +1 2 +1 2 +1 2 +1 2 +1 2 +1 2 +1 2 +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDER BY u64 INDEXES idx3 BY u64 - i32 TYPE minmax GRANULARITY 10, idx2 BY u64 + i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +1 2 +1 2 +1 2 +1 2 +1 2 +1 2 +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDER BY u64 INDEXES idx2 BY u64 + i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDER BY u64 INDEXES idx1 BY u64 * i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +1 2 +1 2 +1 2 +1 2 +1 2 +1 2 +1 2 +1 2 +CREATE TABLE test.minmax_idx2 ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDER BY u64 INDEXES idx1 BY u64 + i32 TYPE minmax GRANULARITY 10, idx2 BY u64 * i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +1 2 +1 2 \ No newline at end of file diff --git a/dbms/tests/queries/0_stateless/00824_indices_alter.sql b/dbms/tests/queries/0_stateless/00824_indices_alter.sql index 2b18ab115e7..1963ae3afcd 100644 --- a/dbms/tests/queries/0_stateless/00824_indices_alter.sql +++ b/dbms/tests/queries/0_stateless/00824_indices_alter.sql @@ -16,7 +16,7 @@ ALTER TABLE test.minmax_idx ADD INDEX idx3 BY u64 - i32 TYPE minmax GRANULARITY SHOW CREATE TABLE test.minmax_idx; -SELECT * FROM test.minmax_idx WHERE u64 * i32 = 3; +SELECT * FROM test.minmax_idx WHERE u64 * i32 = 2; INSERT INTO test.minmax_idx VALUES (1, 2); INSERT INTO test.minmax_idx VALUES (1, 2); @@ -24,16 +24,16 @@ INSERT INTO test.minmax_idx VALUES (1, 2); INSERT INTO test.minmax_idx VALUES (1, 2); INSERT INTO test.minmax_idx VALUES (1, 2); -SELECT * FROM test.minmax_idx WHERE u64 * i32 = 3; +SELECT * FROM test.minmax_idx WHERE u64 * i32 = 2; ALTER TABLE test.minmax_idx DROP INDEX idx1; SHOW CREATE TABLE test.minmax_idx; -SELECT * FROM test.minmax_idx WHERE u64 * i32 = 3; +SELECT * FROM test.minmax_idx WHERE u64 * i32 = 2; -ALTER TABLE test.minmax_idx DROP INDEX idx3; ALTER TABLE test.minmax_idx DROP INDEX idx2; +ALTER TABLE test.minmax_idx DROP INDEX idx3; SHOW CREATE TABLE test.minmax_idx; @@ -41,7 +41,7 @@ ALTER TABLE test.minmax_idx ADD INDEX idx1 BY u64 * i32 TYPE minmax GRANULARITY SHOW CREATE TABLE test.minmax_idx; -SELECT * FROM test.minmax_idx WHERE u64 * i32 = 3; +SELECT * FROM test.minmax_idx WHERE u64 * i32 = 2; CREATE TABLE test.minmax_idx2 @@ -56,13 +56,13 @@ INDEXES idx1 BY u64 + i32 TYPE minmax GRANULARITY 10, INSERT INTO test.minmax_idx2 VALUES (1, 2); INSERT INTO test.minmax_idx2 VALUES (1, 2); -SELECT * FROM test.minmax_idx2 WHERE u64 * i32 = 3; +SELECT * FROM test.minmax_idx2 WHERE u64 * i32 = 2; ALTER TABLE test.minmax_idx2 DROP INDEX idx1, DROP INDEX idx2; SHOW CREATE TABLE test.minmax_idx2; -SELECT * FROM test.minmax_idx2 WHERE u64 * i32 = 3; +SELECT * FROM test.minmax_idx2 WHERE u64 * i32 = 2; DROP TABLE test.minmax_idx; DROP TABLE test.minmax_idx2; \ No newline at end of file From 62dde8c32731848c9fca4e0fe6e28e5b6bd17f96 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 16 Jan 2019 22:31:04 +0300 Subject: [PATCH 078/586] fixed alter bug with drop all indices --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 2 +- dbms/src/Storages/StorageMergeTree.cpp | 11 ++++++++--- .../queries/0_stateless/00824_indices_alter.reference | 4 ++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index a199e1bda7a..6522257b30a 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -355,7 +355,7 @@ void MergeTreeData::setSkipIndexes(const ASTPtr & indexes_asts, bool only_check) { if (!only_check) { - skip_indexes_ast = indexes_asts; + skip_indexes_ast = nullptr; indexes.clear(); } return; diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index a3dfd5ba6ea..b98b395a77c 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -219,6 +219,11 @@ void StorageMergeTree::alter( ASTPtr new_indexes_ast = data.skip_indexes_ast; params.apply(new_columns, new_order_by_ast, new_primary_key_ast, new_indexes_ast); + if (new_indexes_ast && new_indexes_ast->children.empty()) + { + new_indexes_ast.reset(); + } + auto parts = data.getDataParts({MergeTreeDataPartState::PreCommitted, MergeTreeDataPartState::Committed, MergeTreeDataPartState::Outdated}); auto columns_for_parts = new_columns.getAllPhysical(); std::vector transactions; @@ -242,10 +247,10 @@ void StorageMergeTree::alter( if (new_indexes_ast.get() != data.skip_indexes_ast.get()) { - if (new_indexes_ast && !new_indexes_ast->children.empty()) - storage_ast.set(storage_ast.indexes, new_indexes_ast); + if (new_indexes_ast == nullptr) + storage_ast.indexes = nullptr; else - storage_ast.set(storage_ast.indexes, nullptr); + storage_ast.set(storage_ast.indexes, new_indexes_ast); } }; diff --git a/dbms/tests/queries/0_stateless/00824_indices_alter.reference b/dbms/tests/queries/0_stateless/00824_indices_alter.reference index a70cc705f86..993d68db73b 100644 --- a/dbms/tests/queries/0_stateless/00824_indices_alter.reference +++ b/dbms/tests/queries/0_stateless/00824_indices_alter.reference @@ -13,7 +13,7 @@ CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDE 1 2 1 2 1 2 -CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDER BY u64 INDEXES idx2 BY u64 + i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192 CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDER BY u64 INDEXES idx1 BY u64 * i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 1 2 1 2 @@ -23,6 +23,6 @@ CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDE 1 2 1 2 1 2 -CREATE TABLE test.minmax_idx2 ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDER BY u64 INDEXES idx1 BY u64 + i32 TYPE minmax GRANULARITY 10, idx2 BY u64 * i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx2 ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192 1 2 1 2 \ No newline at end of file From f8fa36c6bd79ee3f0e8f8640d4bb340001fba3b7 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 16 Jan 2019 22:47:21 +0300 Subject: [PATCH 079/586] fix metadata editing --- dbms/src/Storages/StorageReplicatedMergeTree.cpp | 16 +++++++++++++--- .../0_stateless/00824_indices_alter.reference | 2 +- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 610b4737810..c4defb9c773 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -447,7 +447,10 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column if (metadata_diff.skip_indices_changed) { ParserIndexDeclaration parser; - new_indices_ast = parseQuery(parser, metadata_diff.new_skip_indices, 0); + if (metadata_diff.new_skip_indices.empty()) + new_indices_ast.reset(); + else + new_indices_ast = parseQuery(parser, metadata_diff.new_skip_indices, 0); } storage_modifier = [&](IAST & ast) @@ -464,7 +467,10 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column storage_ast.set(storage_ast.order_by, new_order_by_ast); - storage_ast.set(storage_ast.indexes, new_indices_ast); + if (new_indices_ast) + storage_ast.set(storage_ast.indexes, new_indices_ast); + else + storage_ast.indexes = nullptr; }; } @@ -3082,6 +3088,10 @@ void StorageReplicatedMergeTree::alter(const AlterCommands & params, ASTPtr new_primary_key_ast = data.primary_key_ast; ASTPtr new_indexes_ast = data.skip_indexes_ast; params.apply(new_columns, new_order_by_ast, new_primary_key_ast, new_indexes_ast); + if (new_indexes_ast && new_indexes_ast->children.empty()) + { + new_indexes_ast.reset(); + } String new_columns_str = new_columns.toString(); if (new_columns_str != data.getColumns().toString()) @@ -3092,7 +3102,7 @@ void StorageReplicatedMergeTree::alter(const AlterCommands & params, new_metadata.sorting_key = serializeAST(*MergeTreeData::extractKeyExpressionList(new_order_by_ast)); if (new_indexes_ast.get() != data.skip_indexes_ast.get()) { - if (new_indexes_ast && !new_indexes_ast->children.empty()) + if (new_indexes_ast) new_metadata.skip_indexes = serializeAST(*new_indexes_ast.get()); else new_metadata.skip_indexes = {}; diff --git a/dbms/tests/queries/0_stateless/00824_indices_alter.reference b/dbms/tests/queries/0_stateless/00824_indices_alter.reference index 993d68db73b..30968fbe9de 100644 --- a/dbms/tests/queries/0_stateless/00824_indices_alter.reference +++ b/dbms/tests/queries/0_stateless/00824_indices_alter.reference @@ -25,4 +25,4 @@ CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDE 1 2 CREATE TABLE test.minmax_idx2 ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192 1 2 -1 2 \ No newline at end of file +1 2 From 062819f0a20f178ca50538cebc03cd798b96083b Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 17 Jan 2019 11:05:26 +0300 Subject: [PATCH 080/586] new test and repl fix --- .../ReplicatedMergeTreeTableMetadata.cpp | 4 +- ...inmax_index_replicated_zookeeper.reference | 16 +++++ ...0823_minmax_index_replicated_zookeeper.sql | 65 +++++++++++++++++++ 3 files changed, 83 insertions(+), 2 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00823_minmax_index_replicated_zookeeper.reference create mode 100644 dbms/tests/queries/0_stateless/00823_minmax_index_replicated_zookeeper.sql diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 15d343924ac..2d2c8e65d66 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -68,7 +68,7 @@ void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const out << "sorting key: " << sorting_key << "\n"; if (!skip_indexes.empty()) - out << "skip indexes: " << skip_indexes << "\n"; + out << "indices: " << skip_indexes << "\n"; } String ReplicatedMergeTreeTableMetadata::toString() const @@ -99,7 +99,7 @@ void ReplicatedMergeTreeTableMetadata::read(ReadBuffer & in) if (checkString("sorting key: ", in)) in >> sorting_key >> "\n"; - if (checkString("skip indexes: ", in)) + if (checkString("indices: ", in)) in >> skip_indexes >> "\n"; } diff --git a/dbms/tests/queries/0_stateless/00823_minmax_index_replicated_zookeeper.reference b/dbms/tests/queries/0_stateless/00823_minmax_index_replicated_zookeeper.reference new file mode 100644 index 00000000000..3b0b10a96f1 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00823_minmax_index_replicated_zookeeper.reference @@ -0,0 +1,16 @@ +0 5 4.7 6.50 cba b 2014-01-04 +0 5 4.7 6.50 cba b 2014-03-11 +2 5 4.7 6.50 cba b 2014-06-11 +2 5 4.7 6.50 cba b 2015-01-01 +0 5 4.7 6.50 cba b 2014-01-04 +0 5 4.7 6.50 cba b 2014-03-11 +2 5 4.7 6.50 cba b 2014-06-11 +2 5 4.7 6.50 cba b 2015-01-01 +0 5 4.7 6.50 cba b 2014-01-04 +0 5 4.7 6.50 cba b 2014-03-11 +2 5 4.7 6.50 cba b 2014-06-11 +2 5 4.7 6.50 cba b 2015-01-01 +0 5 4.7 6.50 cba b 2014-01-04 +0 5 4.7 6.50 cba b 2014-03-11 +2 5 4.7 6.50 cba b 2014-06-11 +2 5 4.7 6.50 cba b 2015-01-01 diff --git a/dbms/tests/queries/0_stateless/00823_minmax_index_replicated_zookeeper.sql b/dbms/tests/queries/0_stateless/00823_minmax_index_replicated_zookeeper.sql new file mode 100644 index 00000000000..32d5a776aca --- /dev/null +++ b/dbms/tests/queries/0_stateless/00823_minmax_index_replicated_zookeeper.sql @@ -0,0 +1,65 @@ +DROP TABLE IF EXISTS test.minmax_idx1; +DROP TABLE IF EXISTS test.minmax_idx2; + +CREATE TABLE test.minmax_idx1 +( + u64 UInt64, + i32 Int32, + f64 Float64, + d Decimal(10, 2), + s String, + e Enum8('a' = 1, 'b' = 2, 'c' = 3), + dt Date +) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/minmax', 'r1') +ORDER BY u64 +INDEXES idx_all BY (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 2, + idx_2 BY (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3 +SETTINGS index_granularity = 2; + +CREATE TABLE test.minmax_idx2 +( + u64 UInt64, + i32 Int32, + f64 Float64, + d Decimal(10, 2), + s String, + e Enum8('a' = 1, 'b' = 2, 'c' = 3), + dt Date +) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/minmax', 'r2') +ORDER BY u64 +INDEXES idx_all BY (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 2, + idx_2 BY (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3 +SETTINGS index_granularity = 2; + + +/* many small inserts => table will make merges */ +INSERT INTO test.minmax_idx1 VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-01-01'); +INSERT INTO test.minmax_idx1 VALUES (0, 5, 4.7, 6.5, 'cba', 'b', '2014-01-04'); +INSERT INTO test.minmax_idx2 VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2017-01-01'); +INSERT INTO test.minmax_idx2 VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2016-01-01'); +INSERT INTO test.minmax_idx2 VALUES (2, 5, 4.7, 6.5, 'cba', 'b', '2015-01-01'); +INSERT INTO test.minmax_idx1 VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-11-11'); + +SYSTEM SYNC REPLICA test.minmax_idx1; +SYSTEM SYNC REPLICA test.minmax_idx2; + +INSERT INTO test.minmax_idx1 VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-02-11'); +INSERT INTO test.minmax_idx1 VALUES (0, 5, 4.7, 6.5, 'cba', 'b', '2014-03-11'); +INSERT INTO test.minmax_idx1 VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-04-11'); +INSERT INTO test.minmax_idx1 VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-05-11'); +INSERT INTO test.minmax_idx2 VALUES (2, 5, 4.7, 6.5, 'cba', 'b', '2014-06-11'); +INSERT INTO test.minmax_idx2 VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-07-11'); + +SYSTEM SYNC REPLICA test.minmax_idx1; +SYSTEM SYNC REPLICA test.minmax_idx2; + +/* simple select */ +SELECT * FROM test.minmax_idx1 WHERE i32 = 5 AND i32 + f64 < 12 AND 3 < d AND d < 7 AND (s = 'bac' OR s = 'cba') ORDER BY dt; +SELECT * FROM test.minmax_idx2 WHERE i32 = 5 AND i32 + f64 < 12 AND 3 < d AND d < 7 AND (s = 'bac' OR s = 'cba') ORDER BY dt; + +/* select with hole made by primary key */ +SELECT * FROM test.minmax_idx1 WHERE u64 != 1 AND e = 'b' ORDER BY dt; +SELECT * FROM test.minmax_idx2 WHERE u64 != 1 AND e = 'b' ORDER BY dt; + +DROP TABLE test.minmax_idx1; +DROP TABLE test.minmax_idx2; \ No newline at end of file From 59cb1cbcefbab78ebc92293f7e403a61bb12cb15 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 17 Jan 2019 11:09:00 +0300 Subject: [PATCH 081/586] rm test files --- dbms/tests/queries/clickhouse-functions | 558 ------------------ dbms/tests/queries/clickhouse-table_functions | 10 - 2 files changed, 568 deletions(-) delete mode 100644 dbms/tests/queries/clickhouse-functions delete mode 100644 dbms/tests/queries/clickhouse-table_functions diff --git a/dbms/tests/queries/clickhouse-functions b/dbms/tests/queries/clickhouse-functions deleted file mode 100644 index 87ae1c21f88..00000000000 --- a/dbms/tests/queries/clickhouse-functions +++ /dev/null @@ -1,558 +0,0 @@ -convertCharset -transform -findClusterValue -findClusterIndex -toNullable -coalesce -isNotNull -pointInEllipses -pow -acos -asin -tan -cos -lgamma -erfc -erf -sqrt -log10 -exp10 -e -visitParamExtractFloat -visitParamExtractUInt -decodeURLComponent -cutURLParameter -cutQueryStringAndFragment -cutFragment -cutWWW -URLPathHierarchy -URLHierarchy -extractURLParameterNames -extractURLParameter -queryStringAndFragment -pathFull -sin -topLevelDomain -domainWithoutWWW -domain -protocol -greatCircleDistance -extract -match -positionCaseInsensitiveUTF8 -positionUTF8 -position -replaceRegexpAll -replaceRegexpOne -arrayStringConcat -splitByString -alphaTokens -tgamma -base64Decode -regexpQuoteMeta -positionCaseInsensitive -trimBoth -trimRight -endsWith -startsWith -appendTrailingCharIfAbsent -substringUTF8 -concatAssumeInjective -reverseUTF8 -upperUTF8 -upper -lower -length -notEmpty -trunc -round -reinterpretAsString -reinterpretAsDateTime -reinterpretAsDate -reinterpretAsFloat64 -reinterpretAsFloat32 -reinterpretAsInt64 -reinterpretAsInt8 -reinterpretAsUInt32 -generateUUIDv4 -rand -toISOYear -toISOWeek -concat -toDecimal64 -ifNull -toStartOfDay -toSecond -addSeconds -sleepEachRow -toDayOfWeek -toDayOfMonth -reinterpretAsUInt8 -UUIDNumToString -bitShiftLeft -toDate -sleep -emptyArrayUInt8 -parseDateTimeBestEffort -toFloat32OrNull -toInt16 -IPv6NumToString -atan -substring -arrayIntersect -isInfinite -visitParamExtractString -globalNotIn -toDateTimeOrZero -toRelativeHourNum -toIntervalYear -toFloat64OrZero -dateDiff -hex -arrayEnumerateDense -toUInt8OrZero -toRelativeSecondNum -toUInt64OrNull -MACNumToString -toInt32OrNull -toDayOfYear -toUnixTimestamp -toString -toDateOrZero -subtractDays -murmurHash2_32 -toUInt64 -toUInt8 -dictGetDateTime -empty -isFinite -caseWithExpression -caseWithoutExpression -caseWithoutExpr -visitParamExtractRaw -queryString -dictGetInt32OrDefault -tryBase64Decode -toInt8OrZero -multiIf -if -intExp10 -bitShiftRight -less -roundToExp2 -toUInt8OrNull -dictGetUInt16 -like -regionToPopulation -MACStringToOUI -parseDateTimeBestEffortOrNull -not -toInt32OrZero -arrayFilter -toInt16OrZero -range -equals -now -toInt8OrNull -bitmaskToArray -roundAge -toIntervalMonth -toUUID -notEquals -toInt16OrNull -murmurHash2_64 -hasAny -joinGet -toIntervalMinute -notLike -lcm -toStringCutToZero -isNull -tupleElement -replaceAll -parseDateTimeBestEffortOrZero -toFloat32OrZero -lowerUTF8 -notIn -gcd -murmurHash3_64 -toMinute -toDateTimeOrNull -toTime -materialize -roundDuration -gccMurmurHash -ceil -toStartOfQuarter -dictGetInt8OrDefault -MACStringToNum -toTypeName -toUInt32OrNull -emptyArrayString -dictGetDateTimeOrDefault -bitRotateRight -cutIPv6 -toUInt32OrZero -timezone -arrayPopFront -toInt32 -intHash64 -extractURLParameters -lowCardinalityIndices -toStartOfMonth -toYear -hasAll -rowNumberInAllBlocks -bitTestAll -arrayCount -arraySort -abs -bitNot -intDiv -intDivOrZero -firstSignificantSubdomain -reinterpretAsUInt16 -dictGetFloat32OrDefault -toHour -minus -regionToArea -unhex -IPv4StringToNum -toIntervalHour -toInt8 -dictGetFloat32 -log -IPv4NumToString -modulo -arrayEnumerate -reinterpretAsUInt64 -toMonth -visitParamExtractBool -emptyArrayUInt64 -replaceOne -arrayReverseSort -toFloat32 -trimLeft -toRelativeMonthNum -emptyArrayInt32 -randConstant -CAST -bitRotateLeft -toRelativeYearNum -negate -toUInt64OrZero -emptyArrayFloat64 -bitTest -toDecimal128 -plus -or -cutQueryString -reinterpretAsFixedString -countEqual -arrayPopBack -arrayElement -log2 -array -arrayReverse -arrayPushBack -subtractWeeks -bitTestAny -bitAnd -base64Encode -toDecimal32 -arrayPushFront -lessOrEquals -intExp2 -toUInt16OrZero -arrayConcat -arrayCumSum -arraySlice -addDays -dictGetUInt8 -toUInt32 -bitOr -caseWithExpr -UUIDStringToNum -emptyArrayUInt16 -toIntervalDay -MD5 -emptyArrayUInt32 -emptyArrayInt8 -toMonday -addMonths -CHAR_LENGTH -least -divide -arrayUniq -SHA256 -arrayExists -multiply -toUInt16OrNull -dictGetInt8 -visitParamHas -emptyArrayInt64 -toIntervalSecond -emptyArrayToSingle -path -toInt64OrZero -SHA1 -extractAll -roundDown -emptyArrayDate -dumpColumnStructure -lengthUTF8 -greatest -arrayEnumerateUniq -arrayDistinct -javaHash -arrayFirst -toFixedString -IPv4NumToStringClassC -toFloat64OrNull -IPv4ToIPv6 -identity -toStartOfYear -visitParamExtractInt -toStartOfMinute -toStartOfFiveMinute -toStartOfFifteenMinutes -toStartOfHour -sumburConsistentHash -toStartOfISOYear -toRelativeQuarterNum -toRelativeWeekNum -toRelativeDayNum -cbrt -yesterday -bitXor -timeSlot -timeSlots -emptyArrayInt16 -dictGetInt16 -toYYYYMM -toYYYYMMDDhhmmss -toUInt16 -addMinutes -addHours -addWeeks -nullIf -subtractSeconds -subtractMinutes -toIntervalWeek -subtractHours -isNaN -subtractMonths -reverse -runningDifferenceStartingWithFirstValue -toDateTime -subtractQuarters -tuple -arrayCumSumNonNegative -rowNumberInBlock -toDateOrNull -subtractYears -toTimeZone -formatDateTime -has -cityHash64 -intHash32 -fragment -regionToCity -dictGetOrDefault -indexOf -regionToDistrict -regionToCountry -visibleWidth -regionToContinent -regionToTopContinent -toColumnTypeName -regionHierarchy -dictGetDate -dictHas -dictGetUInt64 -cutToFirstSignificantSubdomain -dictGetInt32 -pointInPolygon -dictGetInt64 -blockNumber -IPv6StringToNum -dictGetString -dictGetFloat64 -dictGetUUID -CHARACTER_LENGTH -toQuarter -dictGetHierarchy -toFloat64 -arraySum -toInt64OrNull -dictIsIn -exp -floor -dictGetUInt8OrDefault -dictGetUInt16OrDefault -dictGetUInt32OrDefault -emptyArrayDateTime -greater -jumpConsistentHash -dictGetUInt64OrDefault -dictGetInt16OrDefault -dictGetInt64OrDefault -reinterpretAsInt32 -dictGetUInt32 -murmurHash3_32 -rand64 -dictGetDateOrDefault -bar -dictGetUUIDOrDefault -regionToName -dictGetStringOrDefault -splitByChar -dictGet -modelEvaluate -arrayReduce -farmHash64 -bitmaskToList -formatReadableSize -halfMD5 -SHA224 -arrayMap -sipHash64 -dictGetFloat64OrDefault -sipHash128 -metroHash64 -hiveHash -murmurHash3_128 -toInt64 -xxHash32 -xxHash64 -yandexConsistentHash -emptyArrayFloat32 -arrayAll -toYYYYMMDD -today -arrayFirstIndex -greaterOrEquals -arrayDifference -toIntervalQuarter -throwIf -and -xor -addQuarters -currentDatabase -hostName -URLHash -getSizeOfEnumType -defaultValueOfArgumentType -blockSize -arrayResize -ignore -toRelativeMinuteNum -indexHint -reinterpretAsInt16 -addYears -arrayJoin -replicate -hasColumnInTable -version -regionIn -uptime -runningAccumulate -runningDifference -assumeNotNull -pi -finalizeAggregation -toLowCardinality -exp2 -lowCardinalityKeys -in -globalIn -power -ln -replace -locate -ceiling -truncate -lcase -ucase -substr -mid -retention -maxIntersections -groupBitXor -groupBitOr -uniqUpTo -uniqExact -uniq -covarPop -stddevPop -varPop -covarSamp -varSamp -sumMap -corrStable -corr -quantileTiming -quantileDeterministic -quantilesExact -uniqHLL12 -quantilesTiming -covarPopStable -stddevSampStable -quantilesExactWeighted -quantileExactWeighted -quantileTimingWeighted -quantileExact -quantilesDeterministic -quantiles -topK -sumWithOverflow -count -groupArray -stddevSamp -groupArrayInsertAt -quantile -quantilesTimingWeighted -sum -covarSampStable -anyLast -quantileTDigest -quantilesTDigest -windowFunnel -min -argMax -varSampStable -maxIntersectionsPosition -quantilesTDigestWeighted -groupUniqArray -sequenceCount -uniqCombined -boundingRatio -sumKahan -any -anyHeavy -histogram -quantileTDigestWeighted -max -groupBitAnd -argMin -varPopStable -avg -sequenceMatch -stddevPopStable -BIT_XOR -medianExactWeighted -medianTiming -medianExact -median -medianDeterministic -VAR_SAMP -STDDEV_POP -medianTDigest -VAR_POP -medianTDigestWeighted -BIT_OR -STDDEV_SAMP -medianTimingWeighted -COVAR_SAMP -COVAR_POP -BIT_AND diff --git a/dbms/tests/queries/clickhouse-table_functions b/dbms/tests/queries/clickhouse-table_functions deleted file mode 100644 index 0ee110959df..00000000000 --- a/dbms/tests/queries/clickhouse-table_functions +++ /dev/null @@ -1,10 +0,0 @@ -jdbc -odbc -hdfs -remote -catBoostPool -merge -file -cluster -url -numbers From d514d3739af26b963c3829ce052e0cfcc71efd27 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 17 Jan 2019 12:16:39 +0300 Subject: [PATCH 082/586] fixed repl alter --- .../Storages/StorageReplicatedMergeTree.cpp | 2 +- ...dices_alter_replicated_zookeeper.reference | 58 +++++++++ ...824_indices_alter_replicated_zookeeper.sql | 111 ++++++++++++++++++ 3 files changed, 170 insertions(+), 1 deletion(-) create mode 100644 dbms/tests/queries/0_stateless/00824_indices_alter_replicated_zookeeper.reference create mode 100644 dbms/tests/queries/0_stateless/00824_indices_alter_replicated_zookeeper.sql diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index c4defb9c773..8f0e629f9c9 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -446,7 +446,7 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column if (metadata_diff.skip_indices_changed) { - ParserIndexDeclaration parser; + ParserIndexDeclarationList parser; if (metadata_diff.new_skip_indices.empty()) new_indices_ast.reset(); else diff --git a/dbms/tests/queries/0_stateless/00824_indices_alter_replicated_zookeeper.reference b/dbms/tests/queries/0_stateless/00824_indices_alter_replicated_zookeeper.reference new file mode 100644 index 00000000000..6d97145c07f --- /dev/null +++ b/dbms/tests/queries/0_stateless/00824_indices_alter_replicated_zookeeper.reference @@ -0,0 +1,58 @@ +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 INDEXES idx1 BY u64 * i32 TYPE minmax GRANULARITY 10, idx3 BY u64 - i32 TYPE minmax GRANULARITY 10, idx2 BY u64 + i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 INDEXES idx1 BY u64 * i32 TYPE minmax GRANULARITY 10, idx3 BY u64 - i32 TYPE minmax GRANULARITY 10, idx2 BY u64 + i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +1 2 +1 2 +1 2 +1 4 +1 5 +3 2 +19 9 +65 75 +1 2 +1 4 +1 5 +3 2 +19 9 +65 75 +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 INDEXES idx3 BY u64 - i32 TYPE minmax GRANULARITY 10, idx2 BY u64 + i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 INDEXES idx3 BY u64 - i32 TYPE minmax GRANULARITY 10, idx2 BY u64 + i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +1 2 +1 4 +1 5 +3 2 +19 9 +65 75 +1 2 +1 4 +1 5 +3 2 +19 9 +65 75 +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 INDEXES idx1 BY u64 * i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 INDEXES idx1 BY u64 * i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +1 2 +1 4 +1 5 +3 2 +19 9 +65 75 +1 2 +1 4 +1 5 +3 2 +19 9 +65 75 +CREATE TABLE test.minmax_idx2 ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r1\') ORDER BY u64 INDEXES idx1 BY u64 + i32 TYPE minmax GRANULARITY 10, idx2 BY u64 * i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx2_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r2\') ORDER BY u64 INDEXES idx1 BY u64 + i32 TYPE minmax GRANULARITY 10, idx2 BY u64 * i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +1 2 +1 3 +1 2 +1 3 +CREATE TABLE test.minmax_idx2 ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx2_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 +1 2 +1 3 +1 2 +1 3 diff --git a/dbms/tests/queries/0_stateless/00824_indices_alter_replicated_zookeeper.sql b/dbms/tests/queries/0_stateless/00824_indices_alter_replicated_zookeeper.sql new file mode 100644 index 00000000000..0a452cdf755 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00824_indices_alter_replicated_zookeeper.sql @@ -0,0 +1,111 @@ +DROP TABLE IF EXISTS test.minmax_idx; +DROP TABLE IF EXISTS test.minmax_idx_r; +DROP TABLE IF EXISTS test.minmax_idx2; +DROP TABLE IF EXISTS test.minmax_idx2_r; + +CREATE TABLE test.minmax_idx +( + u64 UInt64, + i32 Int32 +) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/indices_alter1', 'r1') +ORDER BY u64; + +CREATE TABLE test.minmax_idx_r +( + u64 UInt64, + i32 Int32 +) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/indices_alter1', 'r2') +ORDER BY u64; + +INSERT INTO test.minmax_idx VALUES (1, 2); + +SYSTEM SYNC REPLICA test.minmax_idx_r; + +ALTER TABLE test.minmax_idx ADD INDEX idx1 BY u64 * i32 TYPE minmax GRANULARITY 10; +ALTER TABLE test.minmax_idx_r ADD INDEX idx2 BY u64 + i32 TYPE minmax GRANULARITY 10; +ALTER TABLE test.minmax_idx ADD INDEX idx3 BY u64 - i32 TYPE minmax GRANULARITY 10 AFTER idx1; + +SHOW CREATE TABLE test.minmax_idx; +SHOW CREATE TABLE test.minmax_idx_r; + +SELECT * FROM test.minmax_idx WHERE u64 * i32 = 2 ORDER BY (u64, i32); +SELECT * FROM test.minmax_idx_r WHERE u64 * i32 = 2 ORDER BY (u64, i32); + +INSERT INTO test.minmax_idx VALUES (1, 4); +INSERT INTO test.minmax_idx_r VALUES (3, 2); +INSERT INTO test.minmax_idx VALUES (1, 5); +INSERT INTO test.minmax_idx_r VALUES (65, 75); +INSERT INTO test.minmax_idx VALUES (19, 9); + +SYSTEM SYNC REPLICA test.minmax_idx; +SYSTEM SYNC REPLICA test.minmax_idx_r; + +SELECT * FROM test.minmax_idx WHERE u64 * i32 > 1 ORDER BY (u64, i32); +SELECT * FROM test.minmax_idx_r WHERE u64 * i32 > 1 ORDER BY (u64, i32); + +ALTER TABLE test.minmax_idx DROP INDEX idx1; + +SHOW CREATE TABLE test.minmax_idx; +SHOW CREATE TABLE test.minmax_idx_r; + +SELECT * FROM test.minmax_idx WHERE u64 * i32 > 1 ORDER BY (u64, i32); +SELECT * FROM test.minmax_idx_r WHERE u64 * i32 > 1 ORDER BY (u64, i32); + +ALTER TABLE test.minmax_idx DROP INDEX idx2; +ALTER TABLE test.minmax_idx_r DROP INDEX idx3; + +SHOW CREATE TABLE test.minmax_idx; +SHOW CREATE TABLE test.minmax_idx_r; + +ALTER TABLE test.minmax_idx ADD INDEX idx1 BY u64 * i32 TYPE minmax GRANULARITY 10; + +SHOW CREATE TABLE test.minmax_idx; +SHOW CREATE TABLE test.minmax_idx_r; + +SELECT * FROM test.minmax_idx WHERE u64 * i32 > 1 ORDER BY (u64, i32); +SELECT * FROM test.minmax_idx_r WHERE u64 * i32 > 1 ORDER BY (u64, i32); + + +CREATE TABLE test.minmax_idx2 +( + u64 UInt64, + i32 Int32 +) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/indices_alter2', 'r1') +ORDER BY u64 +INDEXES idx1 BY u64 + i32 TYPE minmax GRANULARITY 10, + idx2 BY u64 * i32 TYPE minmax GRANULARITY 10; + +CREATE TABLE test.minmax_idx2_r +( + u64 UInt64, + i32 Int32 +) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/indices_alter2', 'r2') +ORDER BY u64 +INDEXES idx1 BY u64 + i32 TYPE minmax GRANULARITY 10, + idx2 BY u64 * i32 TYPE minmax GRANULARITY 10; + + +SHOW CREATE TABLE test.minmax_idx2; +SHOW CREATE TABLE test.minmax_idx2_r; + +INSERT INTO test.minmax_idx2 VALUES (1, 2); +INSERT INTO test.minmax_idx2_r VALUES (1, 3); + +SYSTEM SYNC REPLICA test.minmax_idx2; +SYSTEM SYNC REPLICA test.minmax_idx2_r; + +SELECT * FROM test.minmax_idx2 WHERE u64 * i32 >= 2 ORDER BY (u64, i32); +SELECT * FROM test.minmax_idx2_r WHERE u64 * i32 >= 2 ORDER BY (u64, i32); + +ALTER TABLE test.minmax_idx2_r DROP INDEX idx1, DROP INDEX idx2; + +SHOW CREATE TABLE test.minmax_idx2; +SHOW CREATE TABLE test.minmax_idx2_r; + +SELECT * FROM test.minmax_idx2 WHERE u64 * i32 >= 2 ORDER BY (u64, i32); +SELECT * FROM test.minmax_idx2_r WHERE u64 * i32 >= 2 ORDER BY (u64, i32); + +DROP TABLE test.minmax_idx; +DROP TABLE test.minmax_idx_r; +DROP TABLE test.minmax_idx2; +DROP TABLE test.minmax_idx2_r; \ No newline at end of file From 8d478724365c166872007a6875785aec508401c8 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 17 Jan 2019 13:09:51 +0300 Subject: [PATCH 083/586] fix --- dbms/src/Parsers/ParserCreateQuery.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index e08ddbd10ef..38bc71aab0f 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include From 6b8c7dadb941a39ffe0e6e3f7be2b9b244525a44 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 17 Jan 2019 14:10:43 +0300 Subject: [PATCH 084/586] fix --- dbms/src/Parsers/ASTCreateQuery.h | 2 +- dbms/src/Parsers/ParserCreateQuery.cpp | 2 +- .../queries/0_stateless/00823_minmax_index.sql | 2 +- .../00823_minmax_index_replicated_zookeeper.sql | 4 ++-- .../0_stateless/00824_indices_alter.reference | 6 +++--- .../queries/0_stateless/00824_indices_alter.sql | 2 +- ..._indices_alter_replicated_zookeeper.reference | 16 ++++++++-------- .../00824_indices_alter_replicated_zookeeper.sql | 4 ++-- 8 files changed, 19 insertions(+), 19 deletions(-) diff --git a/dbms/src/Parsers/ASTCreateQuery.h b/dbms/src/Parsers/ASTCreateQuery.h index 8c1787f9bb4..0fdd27902d6 100644 --- a/dbms/src/Parsers/ASTCreateQuery.h +++ b/dbms/src/Parsers/ASTCreateQuery.h @@ -77,7 +77,7 @@ public: } if (indexes) { - s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << "INDEXES " << (s.hilite ? hilite_none : ""); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << "INDICES " << (s.hilite ? hilite_none : ""); indexes->formatImpl(s, state, frame); } if (settings) diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index 38bc71aab0f..d6eaf00d33c 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -154,7 +154,7 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKeyword s_primary_key("PRIMARY KEY"); ParserKeyword s_order_by("ORDER BY"); ParserKeyword s_sample_by("SAMPLE BY"); - ParserKeyword s_indexes("INDEXES"); + ParserKeyword s_indexes("INDICES"); ParserKeyword s_settings("SETTINGS"); ParserIdentifierWithOptionalParameters ident_with_optional_params_p; diff --git a/dbms/tests/queries/0_stateless/00823_minmax_index.sql b/dbms/tests/queries/0_stateless/00823_minmax_index.sql index a1ca97f3a7b..00d89fcfcd9 100644 --- a/dbms/tests/queries/0_stateless/00823_minmax_index.sql +++ b/dbms/tests/queries/0_stateless/00823_minmax_index.sql @@ -11,7 +11,7 @@ CREATE TABLE test.minmax_idx dt Date ) ENGINE = MergeTree() ORDER BY u64 -INDEXES idx_all BY (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 2, +INDICES idx_all BY (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 2, idx_2 BY (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3 SETTINGS index_granularity = 2; diff --git a/dbms/tests/queries/0_stateless/00823_minmax_index_replicated_zookeeper.sql b/dbms/tests/queries/0_stateless/00823_minmax_index_replicated_zookeeper.sql index 32d5a776aca..d264d769874 100644 --- a/dbms/tests/queries/0_stateless/00823_minmax_index_replicated_zookeeper.sql +++ b/dbms/tests/queries/0_stateless/00823_minmax_index_replicated_zookeeper.sql @@ -12,7 +12,7 @@ CREATE TABLE test.minmax_idx1 dt Date ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/minmax', 'r1') ORDER BY u64 -INDEXES idx_all BY (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 2, +INDICES idx_all BY (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 2, idx_2 BY (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3 SETTINGS index_granularity = 2; @@ -27,7 +27,7 @@ CREATE TABLE test.minmax_idx2 dt Date ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/minmax', 'r2') ORDER BY u64 -INDEXES idx_all BY (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 2, +INDICES idx_all BY (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 2, idx_2 BY (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3 SETTINGS index_granularity = 2; diff --git a/dbms/tests/queries/0_stateless/00824_indices_alter.reference b/dbms/tests/queries/0_stateless/00824_indices_alter.reference index 30968fbe9de..275413608bb 100644 --- a/dbms/tests/queries/0_stateless/00824_indices_alter.reference +++ b/dbms/tests/queries/0_stateless/00824_indices_alter.reference @@ -1,4 +1,4 @@ -CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDER BY u64 INDEXES idx1 BY u64 * i32 TYPE minmax GRANULARITY 10, idx3 BY u64 - i32 TYPE minmax GRANULARITY 10, idx2 BY u64 + i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDER BY u64 INDICES idx1 BY u64 * i32 TYPE minmax GRANULARITY 10, idx3 BY u64 - i32 TYPE minmax GRANULARITY 10, idx2 BY u64 + i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 1 2 1 2 1 2 @@ -6,7 +6,7 @@ CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDE 1 2 1 2 1 2 -CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDER BY u64 INDEXES idx3 BY u64 - i32 TYPE minmax GRANULARITY 10, idx2 BY u64 + i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDER BY u64 INDICES idx3 BY u64 - i32 TYPE minmax GRANULARITY 10, idx2 BY u64 + i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 1 2 1 2 1 2 @@ -14,7 +14,7 @@ CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDE 1 2 1 2 CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDER BY u64 INDEXES idx1 BY u64 * i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDER BY u64 INDICES idx1 BY u64 * i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 1 2 1 2 1 2 diff --git a/dbms/tests/queries/0_stateless/00824_indices_alter.sql b/dbms/tests/queries/0_stateless/00824_indices_alter.sql index 1963ae3afcd..f345d923351 100644 --- a/dbms/tests/queries/0_stateless/00824_indices_alter.sql +++ b/dbms/tests/queries/0_stateless/00824_indices_alter.sql @@ -50,7 +50,7 @@ CREATE TABLE test.minmax_idx2 i32 Int32 ) ENGINE = MergeTree() ORDER BY u64 -INDEXES idx1 BY u64 + i32 TYPE minmax GRANULARITY 10, +INDICES idx1 BY u64 + i32 TYPE minmax GRANULARITY 10, idx2 BY u64 * i32 TYPE minmax GRANULARITY 10; INSERT INTO test.minmax_idx2 VALUES (1, 2); diff --git a/dbms/tests/queries/0_stateless/00824_indices_alter_replicated_zookeeper.reference b/dbms/tests/queries/0_stateless/00824_indices_alter_replicated_zookeeper.reference index 6d97145c07f..a56117e915b 100644 --- a/dbms/tests/queries/0_stateless/00824_indices_alter_replicated_zookeeper.reference +++ b/dbms/tests/queries/0_stateless/00824_indices_alter_replicated_zookeeper.reference @@ -1,5 +1,5 @@ -CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 INDEXES idx1 BY u64 * i32 TYPE minmax GRANULARITY 10, idx3 BY u64 - i32 TYPE minmax GRANULARITY 10, idx2 BY u64 + i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 INDEXES idx1 BY u64 * i32 TYPE minmax GRANULARITY 10, idx3 BY u64 - i32 TYPE minmax GRANULARITY 10, idx2 BY u64 + i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 INDICES idx1 BY u64 * i32 TYPE minmax GRANULARITY 10, idx3 BY u64 - i32 TYPE minmax GRANULARITY 10, idx2 BY u64 + i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 INDICES idx1 BY u64 * i32 TYPE minmax GRANULARITY 10, idx3 BY u64 - i32 TYPE minmax GRANULARITY 10, idx2 BY u64 + i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 1 2 1 2 1 2 @@ -14,8 +14,8 @@ CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMerg 3 2 19 9 65 75 -CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 INDEXES idx3 BY u64 - i32 TYPE minmax GRANULARITY 10, idx2 BY u64 + i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 INDEXES idx3 BY u64 - i32 TYPE minmax GRANULARITY 10, idx2 BY u64 + i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 INDICES idx3 BY u64 - i32 TYPE minmax GRANULARITY 10, idx2 BY u64 + i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 INDICES idx3 BY u64 - i32 TYPE minmax GRANULARITY 10, idx2 BY u64 + i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 1 2 1 4 1 5 @@ -30,8 +30,8 @@ CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMerg 65 75 CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 INDEXES idx1 BY u64 * i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 INDEXES idx1 BY u64 * i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 INDICES idx1 BY u64 * i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 INDICES idx1 BY u64 * i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 1 2 1 4 1 5 @@ -44,8 +44,8 @@ CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMerg 3 2 19 9 65 75 -CREATE TABLE test.minmax_idx2 ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r1\') ORDER BY u64 INDEXES idx1 BY u64 + i32 TYPE minmax GRANULARITY 10, idx2 BY u64 * i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx2_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r2\') ORDER BY u64 INDEXES idx1 BY u64 + i32 TYPE minmax GRANULARITY 10, idx2 BY u64 * i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx2 ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r1\') ORDER BY u64 INDICES idx1 BY u64 + i32 TYPE minmax GRANULARITY 10, idx2 BY u64 * i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx2_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r2\') ORDER BY u64 INDICES idx1 BY u64 + i32 TYPE minmax GRANULARITY 10, idx2 BY u64 * i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 1 2 1 3 1 2 diff --git a/dbms/tests/queries/0_stateless/00824_indices_alter_replicated_zookeeper.sql b/dbms/tests/queries/0_stateless/00824_indices_alter_replicated_zookeeper.sql index 0a452cdf755..0b4a524464f 100644 --- a/dbms/tests/queries/0_stateless/00824_indices_alter_replicated_zookeeper.sql +++ b/dbms/tests/queries/0_stateless/00824_indices_alter_replicated_zookeeper.sql @@ -72,7 +72,7 @@ CREATE TABLE test.minmax_idx2 i32 Int32 ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/indices_alter2', 'r1') ORDER BY u64 -INDEXES idx1 BY u64 + i32 TYPE minmax GRANULARITY 10, +INDICES idx1 BY u64 + i32 TYPE minmax GRANULARITY 10, idx2 BY u64 * i32 TYPE minmax GRANULARITY 10; CREATE TABLE test.minmax_idx2_r @@ -81,7 +81,7 @@ CREATE TABLE test.minmax_idx2_r i32 Int32 ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/indices_alter2', 'r2') ORDER BY u64 -INDEXES idx1 BY u64 + i32 TYPE minmax GRANULARITY 10, +INDICES idx1 BY u64 + i32 TYPE minmax GRANULARITY 10, idx2 BY u64 * i32 TYPE minmax GRANULARITY 10; From c4a725a496142c7cb509a6029349c93338004872 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 17 Jan 2019 15:11:36 +0300 Subject: [PATCH 085/586] indices --- .../Interpreters/InterpreterCreateQuery.cpp | 2 +- dbms/src/Parsers/ASTCreateQuery.h | 10 ++--- dbms/src/Parsers/ParserCreateQuery.cpp | 6 +-- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 32 +++++++-------- dbms/src/Storages/MergeTree/MergeTreeData.h | 12 +++--- .../MergeTree/MergeTreeDataMergerMutator.cpp | 10 ++--- .../Storages/MergeTree/MergeTreeDataPart.h | 2 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 4 +- .../MergeTree/MergeTreeDataWriter.cpp | 2 +- .../Storages/MergeTree/MergeTreeIndexReader.h | 2 +- ...geTreeIndexes.cpp => MergeTreeIndices.cpp} | 2 +- ...{MergeTreeIndexes.h => MergeTreeIndices.h} | 2 +- .../Storages/MergeTree/MergeTreeMinMaxIndex.h | 2 +- .../MergeTree/MergedBlockOutputStream.cpp | 40 +++++++++---------- .../MergeTree/MergedBlockOutputStream.h | 4 +- .../ReplicatedMergeTreeAlterThread.cpp | 2 +- .../ReplicatedMergeTreePartCheckThread.cpp | 2 +- .../ReplicatedMergeTreeTableMetadata.cpp | 16 ++++---- .../ReplicatedMergeTreeTableMetadata.h | 2 +- dbms/src/Storages/MergeTree/checkDataPart.cpp | 4 +- dbms/src/Storages/MergeTree/checkDataPart.h | 2 +- .../MergeTree/registerStorageMergeTree.cpp | 20 +++++----- dbms/src/Storages/StorageFactory.cpp | 2 +- dbms/src/Storages/StorageMergeTree.cpp | 20 +++++----- .../Storages/StorageReplicatedMergeTree.cpp | 16 ++++---- 25 files changed, 109 insertions(+), 109 deletions(-) rename dbms/src/Storages/MergeTree/{MergeTreeIndexes.cpp => MergeTreeIndices.cpp} (97%) rename dbms/src/Storages/MergeTree/{MergeTreeIndexes.h => MergeTreeIndices.h} (98%) diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 15ca21ee5e1..eeb1af65d47 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -106,7 +106,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) const ASTFunction & engine = *storage.engine; /// Currently, there are no database engines, that support any arguments. if (engine.arguments || engine.parameters || storage.partition_by || storage.primary_key - || storage.order_by || storage.sample_by || (storage.indexes && !storage.indexes->children.empty()) || storage.settings) + || storage.order_by || storage.sample_by || (storage.indices && !storage.indices->children.empty()) || storage.settings) { std::stringstream ostr; formatAST(storage, ostr, false, false); diff --git a/dbms/src/Parsers/ASTCreateQuery.h b/dbms/src/Parsers/ASTCreateQuery.h index 0fdd27902d6..c052b3c9c60 100644 --- a/dbms/src/Parsers/ASTCreateQuery.h +++ b/dbms/src/Parsers/ASTCreateQuery.h @@ -19,7 +19,7 @@ public: IAST * primary_key = nullptr; IAST * order_by = nullptr; IAST * sample_by = nullptr; - ASTExpressionList * indexes = nullptr; + ASTExpressionList * indices = nullptr; ASTSetQuery * settings = nullptr; String getID(char) const override { return "Storage definition"; } @@ -39,8 +39,8 @@ public: res->set(res->order_by, order_by->clone()); if (sample_by) res->set(res->sample_by, sample_by->clone()); - if (indexes) - res->set(res->indexes, indexes->clone()); + if (indices) + res->set(res->indices, indices->clone()); if (settings) res->set(res->settings, settings->clone()); @@ -75,10 +75,10 @@ public: s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << "SAMPLE BY " << (s.hilite ? hilite_none : ""); sample_by->formatImpl(s, state, frame); } - if (indexes) + if (indices) { s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << "INDICES " << (s.hilite ? hilite_none : ""); - indexes->formatImpl(s, state, frame); + indices->formatImpl(s, state, frame); } if (settings) { diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index d6eaf00d33c..891bccaa45a 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -167,7 +167,7 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr primary_key; ASTPtr order_by; ASTPtr sample_by; - ASTPtr indexes; + ASTPtr indices; ASTPtr settings; if (!s_engine.ignore(pos, expected)) @@ -213,7 +213,7 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } if (s_indexes.ignore(pos, expected)) { - if (indexes_p.parse(pos, indexes, expected)) + if (indexes_p.parse(pos, indices, expected)) continue; else return false; @@ -234,7 +234,7 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) storage->set(storage->primary_key, primary_key); storage->set(storage->order_by, order_by); storage->set(storage->sample_by, sample_by); - storage->set(storage->indexes, indexes); + storage->set(storage->indices, indices); storage->set(storage->settings, settings); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 6522257b30a..2278b22eb30 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -95,7 +95,7 @@ MergeTreeData::MergeTreeData( const ASTPtr & order_by_ast_, const ASTPtr & primary_key_ast_, const ASTPtr & sample_by_ast_, - const ASTPtr & indexes_ast_, + const ASTPtr & indices_ast_, const MergingParams & merging_params_, const MergeTreeSettings & settings_, bool require_part_metadata_, @@ -116,7 +116,7 @@ MergeTreeData::MergeTreeData( data_parts_by_state_and_info(data_parts_indexes.get()) { setPrimaryKeyAndColumns(order_by_ast_, primary_key_ast_, columns_); - setSkipIndexes(indexes_ast_); + setSkipIndices(indices_ast_); /// NOTE: using the same columns list as is read when performing actual merges. merging_params.check(getColumns().getAllPhysical()); @@ -349,21 +349,21 @@ void MergeTreeData::setPrimaryKeyAndColumns( } -void MergeTreeData::setSkipIndexes(const ASTPtr & indexes_asts, bool only_check) +void MergeTreeData::setSkipIndices(const ASTPtr &indices_asts, bool only_check) { - if (!indexes_asts) + if (!indices_asts) { if (!only_check) { - skip_indexes_ast = nullptr; - indexes.clear(); + skip_indices_ast = nullptr; + skip_indices.clear(); } return; } - MergeTreeIndexes new_indexes; + MergeTreeIndices new_indexes; std::set names; - auto index_list = std::dynamic_pointer_cast(indexes_asts); + auto index_list = std::dynamic_pointer_cast(indices_asts); for (const auto &index_ast : index_list->children) { @@ -384,8 +384,8 @@ void MergeTreeData::setSkipIndexes(const ASTPtr & indexes_asts, bool only_check) if (!only_check) { - skip_indexes_ast = indexes_asts; - indexes = std::move(new_indexes); + skip_indices_ast = indices_asts; + skip_indices = std::move(new_indexes); } } @@ -1047,7 +1047,7 @@ void MergeTreeData::checkAlter(const AlterCommands & commands) auto new_columns = getColumns(); ASTPtr new_order_by_ast = order_by_ast; ASTPtr new_primary_key_ast = primary_key_ast; - ASTPtr new_indexes_ast = skip_indexes_ast; + ASTPtr new_indexes_ast = skip_indices_ast; commands.apply(new_columns, new_order_by_ast, new_primary_key_ast, new_indexes_ast); /// Set of columns that shouldn't be altered. @@ -1066,7 +1066,7 @@ void MergeTreeData::checkAlter(const AlterCommands & commands) columns_alter_forbidden.insert(col); } - for (auto index : indexes) + for (auto index : skip_indices) { /// TODO: some special error telling about "drop index" for (const String & col : index->expr->getRequiredColumns()) @@ -1128,7 +1128,7 @@ void MergeTreeData::checkAlter(const AlterCommands & commands) } setPrimaryKeyAndColumns(new_order_by_ast, new_primary_key_ast, new_columns, /* only_check = */ true); - setSkipIndexes(new_indexes_ast, /* only_check = */ true); + setSkipIndices(new_indexes_ast, /* only_check = */ true); /// Check that type conversions are possible. ExpressionActionsPtr unused_expression; @@ -1136,7 +1136,7 @@ void MergeTreeData::checkAlter(const AlterCommands & commands) bool unused_bool; createConvertExpression(nullptr, getColumns().getAllPhysical(), new_columns.getAllPhysical(), - skip_indexes_ast, new_indexes_ast, unused_expression, unused_map, unused_bool); + skip_indices_ast, new_indexes_ast, unused_expression, unused_map, unused_bool); } void MergeTreeData::createConvertExpression(const DataPartPtr & part, const NamesAndTypesList & old_columns, const NamesAndTypesList & new_columns, @@ -1309,7 +1309,7 @@ MergeTreeData::AlterDataPartTransactionPtr MergeTreeData::alterDataPart( AlterDataPartTransactionPtr transaction(new AlterDataPartTransaction(part)); /// Blocks changes to the part. bool force_update_metadata; createConvertExpression(part, part->columns, new_columns, - skip_indexes_ast, new_indices_ast, + skip_indices_ast, new_indices_ast, expression, transaction->rename_map, force_update_metadata); size_t num_files_to_modify = transaction->rename_map.size(); @@ -2137,7 +2137,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::loadPartAndFixMetadata(const St /// Check the data while we are at it. if (part->checksums.empty()) { - part->checksums = checkDataPart(full_part_path, index_granularity, false, primary_key_data_types, indexes); + part->checksums = checkDataPart(full_part_path, index_granularity, false, primary_key_data_types, skip_indices); { WriteBufferFromFile out(full_part_path + "checksums.txt.tmp", 4096); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index bdfe3247e3c..e7db445689b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include @@ -308,7 +308,7 @@ public: const ASTPtr & order_by_ast_, const ASTPtr & primary_key_ast_, const ASTPtr & sample_by_ast_, /// nullptr, if sampling is not supported. - const ASTPtr & indexes_ast_, + const ASTPtr & indices_ast_, const MergingParams & merging_params_, const MergeTreeSettings & settings_, bool require_part_metadata_, @@ -582,9 +582,9 @@ public: Int64 minmax_idx_date_column_pos = -1; /// In a common case minmax index includes a date column. Int64 minmax_idx_time_column_pos = -1; /// In other cases, minmax index often includes a dateTime column. - /// Secondary (data skipping) indexes for MergeTree - MergeTreeIndexes indexes; - ASTPtr skip_indexes_ast; + /// Secondary (data skipping) indices for MergeTree + MergeTreeIndices skip_indices; + ASTPtr skip_indices_ast; /// Names of columns for primary key + secondary sorting columns. Names sorting_key_columns; @@ -728,7 +728,7 @@ private: void setPrimaryKeyAndColumns(const ASTPtr & new_order_by_ast, ASTPtr new_primary_key_ast, const ColumnsDescription & new_columns, bool only_check = false); - void setSkipIndexes(const ASTPtr & indexes_asts, bool only_check = false); + void setSkipIndices(const ASTPtr &indices_asts, bool only_check = false); void initPartitionKey(); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index cd635a36b8b..3acb5fa5e21 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -334,7 +334,7 @@ MergeTreeData::DataPartsVector MergeTreeDataMergerMutator::selectAllPartsFromPar static void extractMergingAndGatheringColumns( const NamesAndTypesList & all_columns, const ExpressionActionsPtr & sorting_key_expr, - const MergeTreeIndexes & indexes, + const MergeTreeIndices & indexes, const MergeTreeData::MergingParams & merging_params, NamesAndTypesList & gathering_columns, Names & gathering_column_names, NamesAndTypesList & merging_columns, Names & merging_column_names) @@ -556,7 +556,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor NamesAndTypesList gathering_columns, merging_columns; Names gathering_column_names, merging_column_names; extractMergingAndGatheringColumns( - all_columns, data.sorting_key_expr, data.indexes, + all_columns, data.sorting_key_expr, data.skip_indices, data.merging_params, gathering_columns, gathering_column_names, merging_columns, merging_column_names); MergeTreeData::MutableDataPartPtr new_data_part = std::make_shared( @@ -636,7 +636,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor merge_entry, sum_input_rows_upper_bound, column_sizes, watch_prev_elapsed, merge_alg)); BlockInputStreamPtr stream = std::move(input); - for (const auto & index : data.indexes) { + for (const auto & index : data.skip_indices) { stream = std::make_shared( std::make_shared(stream, index->expr)); } @@ -911,7 +911,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor { /// All columns are modified, proceed to write a new part from scratch. - for (const auto & index : data.indexes) + for (const auto & index : data.skip_indices) in = std::make_shared( std::make_shared(in, index->expr)); @@ -946,7 +946,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor /// Checks if columns used in skipping indexes modified/ for (const auto & col : in_header.getNames()) { - for (const auto index : data.indexes) { + for (const auto index : data.skip_indices) { const auto & index_cols = index->expr->getRequiredColumns(); auto it = find(cbegin(index_cols), cend(index_cols), col); if (it != cend(index_cols)) { diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h index 90ea11e4141..faebbfd9459 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 78856992dec..c40432a5ba1 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include @@ -534,7 +534,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts( /// It can be done in multiple threads (one thread for each part). /// Maybe it should be moved to BlockInputStream, but it can cause some problems. - for (auto index : data.indexes) { + for (auto index : data.skip_indices) { auto condition = index->createIndexCondition(query_info, context); if (!condition->alwaysUnknownOrTrue()) { ranges.ranges = filterMarksUsingIndex(index, condition, part, ranges.ranges, settings); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 18df3905846..05a5253323c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -214,7 +214,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa NamesAndTypesList columns = data.getColumns().getAllPhysical().filter(block.getNames()); MergedBlockOutputStream out(data, new_data_part->getFullPath(), columns, compression_codec); - for (auto index : data.indexes) + for (auto index : data.skip_indices) index->expr->execute(block); out.writePrefix(); diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexReader.h b/dbms/src/Storages/MergeTree/MergeTreeIndexReader.h index 5eb2caf62cd..f9e17887ff3 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexReader.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexReader.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include namespace DB { diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndices.cpp similarity index 97% rename from dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp rename to dbms/src/Storages/MergeTree/MergeTreeIndices.cpp index a0bc956ea65..bb651c56d17 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndices.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h b/dbms/src/Storages/MergeTree/MergeTreeIndices.h similarity index 98% rename from dbms/src/Storages/MergeTree/MergeTreeIndexes.h rename to dbms/src/Storages/MergeTree/MergeTreeIndices.h index fab61d07785..9b754cb58b8 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexes.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndices.h @@ -88,7 +88,7 @@ public: }; -using MergeTreeIndexes = std::vector; +using MergeTreeIndices = std::vector; class MergeTreeIndexFactory : public ext::singleton diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h index 198263ae243..5551b69058c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include diff --git a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp index b90817c7272..a1cc4250e99 100644 --- a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -327,12 +327,12 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart( } /// Finish skip index serialization - for (size_t i = 0; i < storage.indexes.size(); ++i) + for (size_t i = 0; i < storage.skip_indices.size(); ++i) { - auto & stream = *skip_indexes_streams[i]; - if (skip_indexes_granules[i] && !skip_indexes_granules[i]->empty()) { - skip_indexes_granules[i]->serializeBinary(stream.compressed); - skip_indexes_granules[i].reset(); + auto & stream = *skip_indices_streams[i]; + if (skip_indices_granules[i] && !skip_indices_granules[i]->empty()) { + skip_indices_granules[i]->serializeBinary(stream.compressed); + skip_indices_granules[i].reset(); } } @@ -354,14 +354,14 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart( index_stream = nullptr; } - for (auto & stream : skip_indexes_streams) + for (auto & stream : skip_indices_streams) { stream->finalize(); stream->addToChecksums(checksums); } - skip_indexes_streams.clear(); - skip_indexes_granules.clear(); + skip_indices_streams.clear(); + skip_indices_granules.clear(); skip_index_filling.clear(); for (ColumnStreams::iterator it = column_streams.begin(); it != column_streams.end(); ++it) @@ -421,9 +421,9 @@ void MergedBlockOutputStream::init() index_stream = std::make_unique(*index_file_stream); } - for (const auto index : storage.indexes) { + for (const auto index : storage.skip_indices) { String stream_name = index->getFileName(); - skip_indexes_streams.emplace_back( + skip_indices_streams.emplace_back( std::move(std::make_unique( stream_name, part_path + stream_name, INDEX_FILE_EXTENSION, @@ -431,7 +431,7 @@ void MergedBlockOutputStream::init() codec, max_compress_block_size, 0, aio_threshold))); - skip_indexes_granules.emplace_back(nullptr); + skip_indices_granules.emplace_back(nullptr); skip_index_filling.push_back(0); } } @@ -447,7 +447,7 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm auto primary_key_column_names = storage.primary_key_columns; Names skip_indexes_column_names; - for (const auto index : storage.indexes) { + for (const auto index : storage.skip_indices) { std::copy(index->columns.cbegin(), index->columns.cend(), std::back_inserter(skip_indexes_column_names)); } @@ -542,10 +542,10 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm { /// Filling and writing skip indexes like in IMergedBlockOutputStream::writeData - for (size_t i = 0; i < storage.indexes.size(); ++i) + for (size_t i = 0; i < storage.skip_indices.size(); ++i) { - const auto index = storage.indexes[i]; - auto & stream = *skip_indexes_streams[i]; + const auto index = storage.skip_indices[i]; + auto & stream = *skip_indices_streams[i]; size_t prev_pos = 0; while (prev_pos < rows) { @@ -557,8 +557,8 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm else { limit = storage.index_granularity; - if (!skip_indexes_granules[i]) { - skip_indexes_granules[i] = index->createIndexGranule(); + if (!skip_indices_granules[i]) { + skip_indices_granules[i] = index->createIndexGranule(); skip_index_filling[i] = 0; if (stream.compressed.offset() >= min_compress_block_size) @@ -570,15 +570,15 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm } size_t pos = prev_pos; - skip_indexes_granules[i]->update(block, &pos, limit); + skip_indices_granules[i]->update(block, &pos, limit); if (pos == prev_pos + limit) { ++skip_index_filling[i]; /// write index if it is filled if (skip_index_filling[i] == index->granularity) { - skip_indexes_granules[i]->serializeBinary(stream.compressed); - skip_indexes_granules[i].reset(); + skip_indices_granules[i]->serializeBinary(stream.compressed); + skip_indices_granules[i].reset(); skip_index_filling[i] = 0; } } diff --git a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h index 0b3429743c4..6bc6e90e887 100644 --- a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h +++ b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h @@ -150,8 +150,8 @@ private: std::unique_ptr index_stream; MutableColumns index_columns; - std::vector> skip_indexes_streams; - MergeTreeIndexGranules skip_indexes_granules; + std::vector> skip_indices_streams; + MergeTreeIndexGranules skip_indices_granules; std::vector skip_index_filling; }; diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp index d0d8f26a03b..dee895d31d6 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp @@ -150,7 +150,7 @@ void ReplicatedMergeTreeAlterThread::run() /// Update the part and write result to temporary files. /// TODO: You can skip checking for too large changes if ZooKeeper has, for example, /// node /flags/force_alter. - auto transaction = storage.data.alterDataPart(part, columns_for_parts, storage.data.skip_indexes_ast, false); + auto transaction = storage.data.alterDataPart(part, columns_for_parts, storage.data.skip_indices_ast, false); if (!transaction) continue; diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index 3c77772bccb..d6a620ac17c 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -225,7 +225,7 @@ void ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name) storage.data.index_granularity, true, storage.data.primary_key_data_types, - storage.data.indexes, + storage.data.skip_indices, [this] { return need_stop.load(); }); if (need_stop) diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 2d2c8e65d66..6dc2f511eaa 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -45,7 +45,7 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr if (data.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) partition_key = formattedAST(MergeTreeData::extractKeyExpressionList(data.partition_by_ast)); - skip_indexes = formattedAST(data.skip_indexes_ast); + skip_indices = formattedAST(data.skip_indices_ast); } void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const @@ -67,8 +67,8 @@ void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const if (!sorting_key.empty()) out << "sorting key: " << sorting_key << "\n"; - if (!skip_indexes.empty()) - out << "indices: " << skip_indexes << "\n"; + if (!skip_indices.empty()) + out << "indices: " << skip_indices << "\n"; } String ReplicatedMergeTreeTableMetadata::toString() const @@ -100,7 +100,7 @@ void ReplicatedMergeTreeTableMetadata::read(ReadBuffer & in) in >> sorting_key >> "\n"; if (checkString("indices: ", in)) - in >> skip_indexes >> "\n"; + in >> skip_indices >> "\n"; } ReplicatedMergeTreeTableMetadata ReplicatedMergeTreeTableMetadata::parse(const String & s) @@ -183,18 +183,18 @@ ReplicatedMergeTreeTableMetadata::checkAndFindDiff(const ReplicatedMergeTreeTabl ErrorCodes::METADATA_MISMATCH); } - if (skip_indexes != from_zk.skip_indexes) + if (skip_indices != from_zk.skip_indices) { if (allow_alter) { diff.skip_indices_changed = true; - diff.new_skip_indices = from_zk.skip_indexes; + diff.new_skip_indices = from_zk.skip_indices; } else throw Exception( "Existing table metadata in ZooKeeper differs in skip indexes." - " Stored in ZooKeeper: " + from_zk.skip_indexes + - ", local: " + skip_indexes, + " Stored in ZooKeeper: " + from_zk.skip_indices + + ", local: " + skip_indices, ErrorCodes::METADATA_MISMATCH); } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h index 19524e1b09a..e350058473b 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h @@ -25,7 +25,7 @@ struct ReplicatedMergeTreeTableMetadata MergeTreeDataFormatVersion data_format_version; String partition_key; String sorting_key; - String skip_indexes; + String skip_indices; ReplicatedMergeTreeTableMetadata() = default; explicit ReplicatedMergeTreeTableMetadata(const MergeTreeData & data); diff --git a/dbms/src/Storages/MergeTree/checkDataPart.cpp b/dbms/src/Storages/MergeTree/checkDataPart.cpp index dc145ef55ca..d0301b8fc27 100644 --- a/dbms/src/Storages/MergeTree/checkDataPart.cpp +++ b/dbms/src/Storages/MergeTree/checkDataPart.cpp @@ -137,7 +137,7 @@ MergeTreeData::DataPart::Checksums checkDataPart( size_t index_granularity, bool require_checksums, const DataTypes & primary_key_data_types, - const MergeTreeIndexes & indexes, + const MergeTreeIndices & indices, std::function is_cancelled) { Logger * log = &Logger::get("checkDataPart"); @@ -243,7 +243,7 @@ MergeTreeData::DataPart::Checksums checkDataPart( } /// Read and check skip indexes - for (const auto index : indexes) + for (const auto index : indices) { LOG_DEBUG(log, "Checking index " << index->name << " in " << path); Stream stream(path, index->getFileName(), ".idx"); diff --git a/dbms/src/Storages/MergeTree/checkDataPart.h b/dbms/src/Storages/MergeTree/checkDataPart.h index 30aa2ebf68e..2037b1334e6 100644 --- a/dbms/src/Storages/MergeTree/checkDataPart.h +++ b/dbms/src/Storages/MergeTree/checkDataPart.h @@ -17,7 +17,7 @@ MergeTreeData::DataPart::Checksums checkDataPart( size_t index_granularity, bool require_checksums, const DataTypes & primary_key_data_types, /// Check the primary key. If it is not necessary, pass an empty array. - const MergeTreeIndexes & indexes = {}, /// Check skip indexes + const MergeTreeIndices & indices = {}, /// Check skip indexes std::function is_cancelled = []{ return false; }); } diff --git a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp index 9f464c40941..0572df4ae14 100644 --- a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include @@ -334,13 +334,13 @@ static StoragePtr create(const StorageFactory::Arguments & args) * - Sorting key in the ORDER BY clause; * - Primary key (if it is different from the sorting key) in the PRIMARY KEY clause; * - Sampling expression in the SAMPLE BY clause; - * - Secondary indexes + * - Secondary indices it the INDICES clause; * - Additional MergeTreeSettings in the SETTINGS clause; */ bool is_extended_storage_def = args.storage_def->partition_by || args.storage_def->primary_key || args.storage_def->order_by - || args.storage_def->sample_by || (args.storage_def->indexes && !args.storage_def->indexes->children.empty()) || args.storage_def->settings; + || args.storage_def->sample_by || (args.storage_def->indices && !args.storage_def->indices->children.empty()) || args.storage_def->settings; String name_part = args.engine_name.substr(0, args.engine_name.size() - strlen("MergeTree")); @@ -555,7 +555,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) ASTPtr order_by_ast; ASTPtr primary_key_ast; ASTPtr sample_by_ast; - ASTPtr indexes_ast; + ASTPtr indices_ast; MergeTreeSettings storage_settings = args.context.getMergeTreeSettings(); if (is_extended_storage_def) @@ -576,8 +576,8 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (args.storage_def->sample_by) sample_by_ast = args.storage_def->sample_by->ptr(); - if (args.storage_def->indexes) { - indexes_ast = args.storage_def->indexes->ptr(); + if (args.storage_def->indices) { + indices_ast = args.storage_def->indices->ptr(); } storage_settings.loadFromQuery(*args.storage_def); @@ -614,18 +614,18 @@ static StoragePtr create(const StorageFactory::Arguments & args) zookeeper_path, replica_name, args.attach, args.data_path, args.database_name, args.table_name, args.columns, args.context, date_column_name, partition_by_ast, order_by_ast, primary_key_ast, - sample_by_ast, indexes_ast, merging_params, storage_settings, + sample_by_ast, indices_ast, merging_params, storage_settings, args.has_force_restore_data_flag); else return StorageMergeTree::create( args.data_path, args.database_name, args.table_name, args.columns, args.attach, args.context, date_column_name, partition_by_ast, order_by_ast, primary_key_ast, - sample_by_ast, indexes_ast, merging_params, storage_settings, + sample_by_ast, indices_ast, merging_params, storage_settings, args.has_force_restore_data_flag); } -static void registerMergeTreeSkipIndexes() { +static void registerMergeTreeSkipIndices() { auto & factory = MergeTreeIndexFactory::instance(); factory.registerIndex("minmax", MergeTreeMinMaxIndexCreator); } @@ -649,7 +649,7 @@ void registerStorageMergeTree(StorageFactory & factory) factory.registerStorage("ReplicatedGraphiteMergeTree", create); factory.registerStorage("ReplicatedVersionedCollapsingMergeTree", create); - registerMergeTreeSkipIndexes(); + registerMergeTreeSkipIndices(); } } diff --git a/dbms/src/Storages/StorageFactory.cpp b/dbms/src/Storages/StorageFactory.cpp index 0830638b13d..93f5ed16abc 100644 --- a/dbms/src/Storages/StorageFactory.cpp +++ b/dbms/src/Storages/StorageFactory.cpp @@ -95,7 +95,7 @@ StoragePtr StorageFactory::get( } if ((storage_def->partition_by || storage_def->primary_key || storage_def->order_by - || storage_def->sample_by || (storage_def->indexes && !storage_def->indexes->children.empty())) + || storage_def->sample_by || (storage_def->indices && !storage_def->indices->children.empty())) && !endsWith(name, "MergeTree")) { throw Exception( diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index b98b395a77c..0ec7eaee753 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -216,12 +216,12 @@ void StorageMergeTree::alter( auto new_columns = data.getColumns(); ASTPtr new_order_by_ast = data.order_by_ast; ASTPtr new_primary_key_ast = data.primary_key_ast; - ASTPtr new_indexes_ast = data.skip_indexes_ast; - params.apply(new_columns, new_order_by_ast, new_primary_key_ast, new_indexes_ast); + ASTPtr new_indices_ast = data.skip_indices_ast; + params.apply(new_columns, new_order_by_ast, new_primary_key_ast, new_indices_ast); - if (new_indexes_ast && new_indexes_ast->children.empty()) + if (new_indices_ast && new_indices_ast->children.empty()) { - new_indexes_ast.reset(); + new_indices_ast.reset(); } auto parts = data.getDataParts({MergeTreeDataPartState::PreCommitted, MergeTreeDataPartState::Committed, MergeTreeDataPartState::Outdated}); @@ -229,7 +229,7 @@ void StorageMergeTree::alter( std::vector transactions; for (const MergeTreeData::DataPartPtr & part : parts) { - if (auto transaction = data.alterDataPart(part, columns_for_parts, new_indexes_ast, false)) + if (auto transaction = data.alterDataPart(part, columns_for_parts, new_indices_ast, false)) transactions.push_back(std::move(transaction)); } @@ -245,12 +245,12 @@ void StorageMergeTree::alter( if (new_primary_key_ast.get() != data.primary_key_ast.get()) storage_ast.set(storage_ast.primary_key, new_primary_key_ast); - if (new_indexes_ast.get() != data.skip_indexes_ast.get()) + if (new_indices_ast.get() != data.skip_indices_ast.get()) { - if (new_indexes_ast == nullptr) - storage_ast.indexes = nullptr; + if (new_indices_ast == nullptr) + storage_ast.indices = nullptr; else - storage_ast.set(storage_ast.indexes, new_indexes_ast); + storage_ast.set(storage_ast.indices, new_indices_ast); } }; @@ -258,7 +258,7 @@ void StorageMergeTree::alter( /// Reinitialize primary key because primary key column types might have changed. data.setPrimaryKeyAndColumns(new_order_by_ast, new_primary_key_ast, new_columns); - data.setSkipIndexes(new_indexes_ast); + data.setSkipIndices(new_indices_ast); for (auto & transaction : transactions) transaction->commit(); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 8f0e629f9c9..609b56f0935 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -418,7 +418,7 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column { ASTPtr new_primary_key_ast = data.primary_key_ast; ASTPtr new_order_by_ast = data.order_by_ast; - ASTPtr new_indices_ast = data.skip_indexes_ast; + ASTPtr new_indices_ast = data.skip_indices_ast; IDatabase::ASTModifier storage_modifier; if (!metadata_diff.empty()) { @@ -468,9 +468,9 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column storage_ast.set(storage_ast.order_by, new_order_by_ast); if (new_indices_ast) - storage_ast.set(storage_ast.indexes, new_indices_ast); + storage_ast.set(storage_ast.indices, new_indices_ast); else - storage_ast.indexes = nullptr; + storage_ast.indices = nullptr; }; } @@ -479,7 +479,7 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column /// Even if the primary/sorting keys didn't change we must reinitialize it /// because primary key column types might have changed. data.setPrimaryKeyAndColumns(new_order_by_ast, new_primary_key_ast, new_columns); - data.setSkipIndexes(new_indices_ast); + data.setSkipIndices(new_indices_ast); } @@ -3086,7 +3086,7 @@ void StorageReplicatedMergeTree::alter(const AlterCommands & params, ColumnsDescription new_columns = data.getColumns(); ASTPtr new_order_by_ast = data.order_by_ast; ASTPtr new_primary_key_ast = data.primary_key_ast; - ASTPtr new_indexes_ast = data.skip_indexes_ast; + ASTPtr new_indexes_ast = data.skip_indices_ast; params.apply(new_columns, new_order_by_ast, new_primary_key_ast, new_indexes_ast); if (new_indexes_ast && new_indexes_ast->children.empty()) { @@ -3100,12 +3100,12 @@ void StorageReplicatedMergeTree::alter(const AlterCommands & params, ReplicatedMergeTreeTableMetadata new_metadata(data); if (new_order_by_ast.get() != data.order_by_ast.get()) new_metadata.sorting_key = serializeAST(*MergeTreeData::extractKeyExpressionList(new_order_by_ast)); - if (new_indexes_ast.get() != data.skip_indexes_ast.get()) + if (new_indexes_ast.get() != data.skip_indices_ast.get()) { if (new_indexes_ast) - new_metadata.skip_indexes = serializeAST(*new_indexes_ast.get()); + new_metadata.skip_indices = serializeAST(*new_indexes_ast.get()); else - new_metadata.skip_indexes = {}; + new_metadata.skip_indices = {}; } String new_metadata_str = new_metadata.toString(); From 8c2a23a129cb6715e6231810cb1732f45e0e9a67 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 17 Jan 2019 17:23:12 +0300 Subject: [PATCH 086/586] MTReadStream --- .../Storages/MergeTree/MergeTreeIndexReader.h | 4 +- .../Storages/MergeTree/MergeTreeReader.cpp | 203 +---------------- dbms/src/Storages/MergeTree/MergeTreeReader.h | 47 +--- .../MergeTree/MergeTreeReaderStream.cpp | 215 ++++++++++++++++++ .../MergeTree/MergeTreeReaderStream.h | 49 ++++ 5 files changed, 270 insertions(+), 248 deletions(-) create mode 100644 dbms/src/Storages/MergeTree/MergeTreeReaderStream.cpp create mode 100644 dbms/src/Storages/MergeTree/MergeTreeReaderStream.h diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexReader.h b/dbms/src/Storages/MergeTree/MergeTreeIndexReader.h index f9e17887ff3..f96cf81e96f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexReader.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexReader.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include @@ -20,7 +20,7 @@ public: private: MergeTreeIndexPtr index; - MergeTreeReader::Stream stream; + MergeTreeReaderStream stream; }; } \ No newline at end of file diff --git a/dbms/src/Storages/MergeTree/MergeTreeReader.cpp b/dbms/src/Storages/MergeTree/MergeTreeReader.cpp index de39ca1b7e7..eca9c5818e0 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeReader.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeReader.cpp @@ -154,205 +154,6 @@ size_t MergeTreeReader::readRows(size_t from_mark, bool continue_reading, size_t return read_rows; } - -MergeTreeReader::Stream::Stream( - const String & path_prefix_, const String & extension_, size_t marks_count_, - const MarkRanges & all_mark_ranges, - MarkCache * mark_cache_, bool save_marks_in_cache_, - UncompressedCache * uncompressed_cache, - size_t aio_threshold, size_t max_read_buffer_size, - const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type) - : path_prefix(path_prefix_), extension(extension_), marks_count(marks_count_) - , mark_cache(mark_cache_), save_marks_in_cache(save_marks_in_cache_) -{ - /// Compute the size of the buffer. - size_t max_mark_range = 0; - - for (size_t i = 0; i < all_mark_ranges.size(); ++i) - { - size_t right = all_mark_ranges[i].end; - /// NOTE: if we are reading the whole file, then right == marks_count - /// and we will use max_read_buffer_size for buffer size, thus avoiding the need to load marks. - - /// If the end of range is inside the block, we will need to read it too. - if (right < marks_count && getMark(right).offset_in_decompressed_block > 0) - { - while (right < marks_count - && getMark(right).offset_in_compressed_file - == getMark(all_mark_ranges[i].end).offset_in_compressed_file) - { - ++right; - } - } - - /// If there are no marks after the end of range, just use max_read_buffer_size - if (right >= marks_count - || (right + 1 == marks_count - && getMark(right).offset_in_compressed_file - == getMark(all_mark_ranges[i].end).offset_in_compressed_file)) - { - max_mark_range = max_read_buffer_size; - break; - } - - max_mark_range = std::max(max_mark_range, - getMark(right).offset_in_compressed_file - getMark(all_mark_ranges[i].begin).offset_in_compressed_file); - } - - /// Avoid empty buffer. May happen while reading dictionary for DataTypeLowCardinality. - /// For example: part has single dictionary and all marks point to the same position. - if (max_mark_range == 0) - max_mark_range = max_read_buffer_size; - - size_t buffer_size = std::min(max_read_buffer_size, max_mark_range); - - /// Estimate size of the data to be read. - size_t estimated_size = 0; - if (aio_threshold > 0) - { - for (const auto & mark_range : all_mark_ranges) - { - size_t offset_begin = (mark_range.begin > 0) - ? getMark(mark_range.begin).offset_in_compressed_file - : 0; - - size_t offset_end = (mark_range.end < marks_count) - ? getMark(mark_range.end).offset_in_compressed_file - : Poco::File(path_prefix + extension).getSize(); - - if (offset_end > offset_begin) - estimated_size += offset_end - offset_begin; - } - } - - /// Initialize the objects that shall be used to perform read operations. - if (uncompressed_cache) - { - auto buffer = std::make_unique( - path_prefix + extension, uncompressed_cache, estimated_size, aio_threshold, buffer_size); - - if (profile_callback) - buffer->setProfileCallback(profile_callback, clock_type); - - cached_buffer = std::move(buffer); - data_buffer = cached_buffer.get(); - } - else - { - auto buffer = std::make_unique( - path_prefix + extension, estimated_size, aio_threshold, buffer_size); - - if (profile_callback) - buffer->setProfileCallback(profile_callback, clock_type); - - non_cached_buffer = std::move(buffer); - data_buffer = non_cached_buffer.get(); - } -} - - -const MarkInCompressedFile & MergeTreeReader::Stream::getMark(size_t index) -{ - if (!marks) - loadMarks(); - return (*marks)[index]; -} - - -void MergeTreeReader::Stream::loadMarks() -{ - std::string mrk_path = path_prefix + ".mrk"; - - auto load = [&]() -> MarkCache::MappedPtr - { - /// Memory for marks must not be accounted as memory usage for query, because they are stored in shared cache. - auto temporarily_disable_memory_tracker = getCurrentMemoryTrackerActionLock(); - - size_t file_size = Poco::File(mrk_path).getSize(); - size_t expected_file_size = sizeof(MarkInCompressedFile) * marks_count; - if (expected_file_size != file_size) - throw Exception( - "bad size of marks file `" + mrk_path + "':" + std::to_string(file_size) + ", must be: " + std::to_string(expected_file_size), - ErrorCodes::CORRUPTED_DATA); - - auto res = std::make_shared(marks_count); - - /// Read directly to marks. - ReadBufferFromFile buffer(mrk_path, file_size, -1, reinterpret_cast(res->data())); - - if (buffer.eof() || buffer.buffer().size() != file_size) - throw Exception("Cannot read all marks from file " + mrk_path, ErrorCodes::CANNOT_READ_ALL_DATA); - - return res; - }; - - if (mark_cache) - { - auto key = mark_cache->hash(mrk_path); - if (save_marks_in_cache) - { - marks = mark_cache->getOrSet(key, load); - } - else - { - marks = mark_cache->get(key); - if (!marks) - marks = load(); - } - } - else - marks = load(); - - if (!marks) - throw Exception("Failed to load marks: " + mrk_path, ErrorCodes::LOGICAL_ERROR); -} - - -void MergeTreeReader::Stream::seekToMark(size_t index) -{ - MarkInCompressedFile mark = getMark(index); - - try - { - if (cached_buffer) - cached_buffer->seek(mark.offset_in_compressed_file, mark.offset_in_decompressed_block); - if (non_cached_buffer) - non_cached_buffer->seek(mark.offset_in_compressed_file, mark.offset_in_decompressed_block); - } - catch (Exception & e) - { - /// Better diagnostics. - if (e.code() == ErrorCodes::ARGUMENT_OUT_OF_BOUND) - e.addMessage("(while seeking to mark " + toString(index) - + " of column " + path_prefix + "; offsets are: " - + toString(mark.offset_in_compressed_file) + " " - + toString(mark.offset_in_decompressed_block) + ")"); - - throw; - } -} - - -void MergeTreeReader::Stream::seekToStart() -{ - try - { - if (cached_buffer) - cached_buffer->seek(0, 0); - if (non_cached_buffer) - non_cached_buffer->seek(0, 0); - } - catch (Exception & e) - { - /// Better diagnostics. - if (e.code() == ErrorCodes::ARGUMENT_OUT_OF_BOUND) - e.addMessage("(while seeking to start of column " + path_prefix + ")"); - - throw; - } -} - - void MergeTreeReader::addStreams(const String & name, const IDataType & type, const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type) { @@ -371,7 +172,7 @@ void MergeTreeReader::addStreams(const String & name, const IDataType & type, if (!data_file_exists) return; - streams.emplace(stream_name, std::make_unique( + streams.emplace(stream_name, std::make_unique( path + stream_name, DATA_FILE_EXTENSION, data_part->marks_count, all_mark_ranges, mark_cache, save_marks_in_cache, uncompressed_cache, aio_threshold, max_read_buffer_size, profile_callback, clock_type)); @@ -401,7 +202,7 @@ void MergeTreeReader::readData( if (it == streams.end()) return nullptr; - Stream & stream = *it->second; + MergeTreeReaderStream & stream = *it->second; if (stream_for_prefix) { diff --git a/dbms/src/Storages/MergeTree/MergeTreeReader.h b/dbms/src/Storages/MergeTree/MergeTreeReader.h index 744f1c0dbe4..74bef7ac118 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeReader.h +++ b/dbms/src/Storages/MergeTree/MergeTreeReader.h @@ -1,12 +1,7 @@ #pragma once -#include -#include -#include -#include -#include -#include #include +#include #include @@ -14,7 +9,6 @@ namespace DB { class IDataType; -class CachedCompressedReadBuffer; /// Reads the data between pairs of marks in the same part. When reading consecutive ranges, avoids unnecessary seeks. /// When ranges are almost consecutive, seeks are fast because they are performed inside the buffer. @@ -57,45 +51,8 @@ public: /// If continue_reading is true, continue reading from last state, otherwise seek to from_mark size_t readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Block & res); - class Stream - { - public: - Stream( - const String & path_prefix_, const String & extension_, size_t marks_count_, - const MarkRanges & all_mark_ranges, - MarkCache * mark_cache, bool save_marks_in_cache, - UncompressedCache * uncompressed_cache, - size_t aio_threshold, size_t max_read_buffer_size, - const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type); - - void seekToMark(size_t index); - void seekToStart(); - - ReadBuffer * data_buffer; - - private: - Stream() = default; - - /// NOTE: lazily loads marks from the marks cache. - const MarkInCompressedFile & getMark(size_t index); - - void loadMarks(); - - std::string path_prefix; - std::string extension; - - size_t marks_count; - - MarkCache * mark_cache; - bool save_marks_in_cache; - MarkCache::MappedPtr marks; - - std::unique_ptr cached_buffer; - std::unique_ptr non_cached_buffer; - }; - private: - using FileStreams = std::map>; + using FileStreams = std::map>; /// avg_value_size_hints are used to reduce the number of reallocations when creating columns of variable size. ValueSizeMap avg_value_size_hints; diff --git a/dbms/src/Storages/MergeTree/MergeTreeReaderStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeReaderStream.cpp new file mode 100644 index 00000000000..890e7b13c6c --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeReaderStream.cpp @@ -0,0 +1,215 @@ +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int CORRUPTED_DATA; + extern const int CANNOT_READ_ALL_DATA; + extern const int ARGUMENT_OUT_OF_BOUND; +} + + +MergeTreeReaderStream::MergeTreeReaderStream( + const String & path_prefix_, const String & extension_, size_t marks_count_, + const MarkRanges & all_mark_ranges, + MarkCache * mark_cache_, bool save_marks_in_cache_, + UncompressedCache * uncompressed_cache, + size_t aio_threshold, size_t max_read_buffer_size, + const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type) + : path_prefix(path_prefix_), extension(extension_), marks_count(marks_count_) + , mark_cache(mark_cache_), save_marks_in_cache(save_marks_in_cache_) +{ + /// Compute the size of the buffer. + size_t max_mark_range = 0; + + for (size_t i = 0; i < all_mark_ranges.size(); ++i) + { + size_t right = all_mark_ranges[i].end; + /// NOTE: if we are reading the whole file, then right == marks_count + /// and we will use max_read_buffer_size for buffer size, thus avoiding the need to load marks. + + /// If the end of range is inside the block, we will need to read it too. + if (right < marks_count && getMark(right).offset_in_decompressed_block > 0) + { + while (right < marks_count + && getMark(right).offset_in_compressed_file + == getMark(all_mark_ranges[i].end).offset_in_compressed_file) + { + ++right; + } + } + + /// If there are no marks after the end of range, just use max_read_buffer_size + if (right >= marks_count + || (right + 1 == marks_count + && getMark(right).offset_in_compressed_file + == getMark(all_mark_ranges[i].end).offset_in_compressed_file)) + { + max_mark_range = max_read_buffer_size; + break; + } + + max_mark_range = std::max(max_mark_range, + getMark(right).offset_in_compressed_file - getMark(all_mark_ranges[i].begin).offset_in_compressed_file); + } + + /// Avoid empty buffer. May happen while reading dictionary for DataTypeLowCardinality. + /// For example: part has single dictionary and all marks point to the same position. + if (max_mark_range == 0) + max_mark_range = max_read_buffer_size; + + size_t buffer_size = std::min(max_read_buffer_size, max_mark_range); + + /// Estimate size of the data to be read. + size_t estimated_size = 0; + if (aio_threshold > 0) + { + for (const auto & mark_range : all_mark_ranges) + { + size_t offset_begin = (mark_range.begin > 0) + ? getMark(mark_range.begin).offset_in_compressed_file + : 0; + + size_t offset_end = (mark_range.end < marks_count) + ? getMark(mark_range.end).offset_in_compressed_file + : Poco::File(path_prefix + extension).getSize(); + + if (offset_end > offset_begin) + estimated_size += offset_end - offset_begin; + } + } + + /// Initialize the objects that shall be used to perform read operations. + if (uncompressed_cache) + { + auto buffer = std::make_unique( + path_prefix + extension, uncompressed_cache, estimated_size, aio_threshold, buffer_size); + + if (profile_callback) + buffer->setProfileCallback(profile_callback, clock_type); + + cached_buffer = std::move(buffer); + data_buffer = cached_buffer.get(); + } + else + { + auto buffer = std::make_unique( + path_prefix + extension, estimated_size, aio_threshold, buffer_size); + + if (profile_callback) + buffer->setProfileCallback(profile_callback, clock_type); + + non_cached_buffer = std::move(buffer); + data_buffer = non_cached_buffer.get(); + } +} + + +const MarkInCompressedFile & MergeTreeReaderStream::getMark(size_t index) +{ + if (!marks) + loadMarks(); + return (*marks)[index]; +} + + +void MergeTreeReaderStream::loadMarks() +{ + std::string mrk_path = path_prefix + ".mrk"; + + auto load = [&]() -> MarkCache::MappedPtr + { + /// Memory for marks must not be accounted as memory usage for query, because they are stored in shared cache. + auto temporarily_disable_memory_tracker = getCurrentMemoryTrackerActionLock(); + + size_t file_size = Poco::File(mrk_path).getSize(); + size_t expected_file_size = sizeof(MarkInCompressedFile) * marks_count; + if (expected_file_size != file_size) + throw Exception( + "bad size of marks file `" + mrk_path + "':" + std::to_string(file_size) + ", must be: " + std::to_string(expected_file_size), + ErrorCodes::CORRUPTED_DATA); + + auto res = std::make_shared(marks_count); + + /// Read directly to marks. + ReadBufferFromFile buffer(mrk_path, file_size, -1, reinterpret_cast(res->data())); + + if (buffer.eof() || buffer.buffer().size() != file_size) + throw Exception("Cannot read all marks from file " + mrk_path, ErrorCodes::CANNOT_READ_ALL_DATA); + + return res; + }; + + if (mark_cache) + { + auto key = mark_cache->hash(mrk_path); + if (save_marks_in_cache) + { + marks = mark_cache->getOrSet(key, load); + } + else + { + marks = mark_cache->get(key); + if (!marks) + marks = load(); + } + } + else + marks = load(); + + if (!marks) + throw Exception("Failed to load marks: " + mrk_path, ErrorCodes::LOGICAL_ERROR); +} + + +void MergeTreeReaderStream::seekToMark(size_t index) +{ + MarkInCompressedFile mark = getMark(index); + + try + { + if (cached_buffer) + cached_buffer->seek(mark.offset_in_compressed_file, mark.offset_in_decompressed_block); + if (non_cached_buffer) + non_cached_buffer->seek(mark.offset_in_compressed_file, mark.offset_in_decompressed_block); + } + catch (Exception & e) + { + /// Better diagnostics. + if (e.code() == ErrorCodes::ARGUMENT_OUT_OF_BOUND) + e.addMessage("(while seeking to mark " + toString(index) + + " of column " + path_prefix + "; offsets are: " + + toString(mark.offset_in_compressed_file) + " " + + toString(mark.offset_in_decompressed_block) + ")"); + + throw; + } +} + + +void MergeTreeReaderStream::seekToStart() +{ + try + { + if (cached_buffer) + cached_buffer->seek(0, 0); + if (non_cached_buffer) + non_cached_buffer->seek(0, 0); + } + catch (Exception & e) + { + /// Better diagnostics. + if (e.code() == ErrorCodes::ARGUMENT_OUT_OF_BOUND) + e.addMessage("(while seeking to start of column " + path_prefix + ")"); + + throw; + } +} + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeReaderStream.h b/dbms/src/Storages/MergeTree/MergeTreeReaderStream.h new file mode 100644 index 00000000000..70ca3d452fa --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeReaderStream.h @@ -0,0 +1,49 @@ +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +class MergeTreeReaderStream +{ +public: + MergeTreeReaderStream( + const String &path_prefix_, const String &extension_, size_t marks_count_, + const MarkRanges &all_mark_ranges, + MarkCache *mark_cache, bool save_marks_in_cache, + UncompressedCache *uncompressed_cache, + size_t aio_threshold, size_t max_read_buffer_size, + const ReadBufferFromFileBase::ProfileCallback &profile_callback, clockid_t clock_type); + + void seekToMark(size_t index); + + void seekToStart(); + + ReadBuffer *data_buffer; + +private: + MergeTreeReaderStream() = default; + + /// NOTE: lazily loads marks from the marks cache. + const MarkInCompressedFile &getMark(size_t index); + + void loadMarks(); + + std::string path_prefix; + std::string extension; + + size_t marks_count; + + MarkCache *mark_cache; + bool save_marks_in_cache; + MarkCache::MappedPtr marks; + + std::unique_ptr cached_buffer; + std::unique_ptr non_cached_buffer; +}; +} \ No newline at end of file From 955b002d36af03fe9b95b4408dae9082d4b10e19 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 17 Jan 2019 18:09:49 +0300 Subject: [PATCH 087/586] upd test for bug --- dbms/tests/queries/0_stateless/00823_minmax_index.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00823_minmax_index.sql b/dbms/tests/queries/0_stateless/00823_minmax_index.sql index 00d89fcfcd9..3278eac275d 100644 --- a/dbms/tests/queries/0_stateless/00823_minmax_index.sql +++ b/dbms/tests/queries/0_stateless/00823_minmax_index.sql @@ -11,7 +11,8 @@ CREATE TABLE test.minmax_idx dt Date ) ENGINE = MergeTree() ORDER BY u64 -INDICES idx_all BY (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 2, +INDICES idx_all BY (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 4, + idx_all2 BY (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 2, idx_2 BY (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3 SETTINGS index_granularity = 2; From 0e6f5b4356394653c01076f05360ba7f103ca4aa Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 17 Jan 2019 21:22:45 +0300 Subject: [PATCH 088/586] fix --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 33 +++++++++++++------ dbms/src/Storages/MergeTree/MergeTreeData.h | 1 + .../MergeTree/MergeTreeDataMergerMutator.cpp | 8 ++--- .../MergeTree/MergeTreeDataWriter.cpp | 4 +-- 4 files changed, 30 insertions(+), 16 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 2278b22eb30..012edf311eb 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -356,36 +356,49 @@ void MergeTreeData::setSkipIndices(const ASTPtr &indices_asts, bool only_check) if (!only_check) { skip_indices_ast = nullptr; + skip_indices_expr = nullptr; skip_indices.clear(); } return; } - MergeTreeIndices new_indexes; + MergeTreeIndices new_indices; std::set names; auto index_list = std::dynamic_pointer_cast(indices_asts); + ASTPtr indices_expr_list = std::make_shared(); - for (const auto &index_ast : index_list->children) + for (const auto & index_ast : index_list->children) { - new_indexes.push_back( + const auto & index_decl = std::dynamic_pointer_cast(index_ast); + + new_indices.push_back( std::move(MergeTreeIndexFactory::instance().get( *this, - std::dynamic_pointer_cast(index_ast), + std::dynamic_pointer_cast(index_decl->clone()), global_context))); - if (names.find(new_indexes.back()->name) != names.end()) - { + if (names.find(new_indices.back()->name) != names.end()) throw Exception( - "Index with name `" + new_indexes.back()->name + "` already exsists", + "Index with name `" + new_indices.back()->name + "` already exsists", ErrorCodes::LOGICAL_ERROR); - } - names.insert(new_indexes.back()->name); + + ASTPtr expr_list = MergeTreeData::extractKeyExpressionList(index_decl->expr->clone()); + for (auto expr : expr_list->children) + indices_expr_list->children.push_back(expr->clone()); + + names.insert(new_indices.back()->name); } + auto syntax = SyntaxAnalyzer(global_context, {}).analyze( + indices_expr_list, getColumns().getAllPhysical()); + auto new_skip_indices_expr = ExpressionAnalyzer(indices_expr_list, syntax, global_context) + .getActions(false); + if (!only_check) { skip_indices_ast = indices_asts; - skip_indices = std::move(new_indexes); + skip_indices_expr = new_skip_indices_expr; + skip_indices = std::move(new_indices); } } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index e7db445689b..611bc4500bc 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -585,6 +585,7 @@ public: /// Secondary (data skipping) indices for MergeTree MergeTreeIndices skip_indices; ASTPtr skip_indices_ast; + ExpressionActionsPtr skip_indices_expr; /// Names of columns for primary key + secondary sorting columns. Names sorting_key_columns; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 3acb5fa5e21..e2b75a503fc 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -636,9 +636,9 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor merge_entry, sum_input_rows_upper_bound, column_sizes, watch_prev_elapsed, merge_alg)); BlockInputStreamPtr stream = std::move(input); - for (const auto & index : data.skip_indices) { + if (data.skip_indices_expr) { stream = std::make_shared( - std::make_shared(stream, index->expr)); + std::make_shared(stream, data.skip_indices_expr)); } if (data.hasPrimaryKey()) { @@ -911,9 +911,9 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor { /// All columns are modified, proceed to write a new part from scratch. - for (const auto & index : data.skip_indices) + if (data.skip_indices_expr) in = std::make_shared( - std::make_shared(in, index->expr)); + std::make_shared(in, data.skip_indices_expr)); if (data.hasPrimaryKey()) in = std::make_shared( diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 05a5253323c..7a853855ba7 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -214,8 +214,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa NamesAndTypesList columns = data.getColumns().getAllPhysical().filter(block.getNames()); MergedBlockOutputStream out(data, new_data_part->getFullPath(), columns, compression_codec); - for (auto index : data.skip_indices) - index->expr->execute(block); + if (data.skip_indices_expr) + data.skip_indices_expr->execute(block); out.writePrefix(); out.writeWithPermutation(block, perm_ptr); From 4bf0ac886befdaaacdc569706f0d22b5d5b11456 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 20 Jan 2019 12:19:34 +0300 Subject: [PATCH 089/586] added useful parsers and ast classes --- dbms/src/Parsers/ASTCreateQuery.h | 50 +++++++++++++++++++++ dbms/src/Parsers/ParserCreateQuery.cpp | 62 ++++++++++++++++++++++++++ dbms/src/Parsers/ParserCreateQuery.h | 17 +++++++ 3 files changed, 129 insertions(+) diff --git a/dbms/src/Parsers/ASTCreateQuery.h b/dbms/src/Parsers/ASTCreateQuery.h index c052b3c9c60..4f5e3faca50 100644 --- a/dbms/src/Parsers/ASTCreateQuery.h +++ b/dbms/src/Parsers/ASTCreateQuery.h @@ -90,6 +90,44 @@ public: }; +class ASTColumns : public IAST +{ +public: + ASTExpressionList * columns = nullptr; + ASTExpressionList * indices = nullptr; + + String getID(char) const override { return "Columns definition"; } + + ASTPtr clone() const override + { + auto res = std::make_shared(); + + if (columns) + res->set(res->columns, columns->clone()); + if (indices) + res->set(res->indices, indices->clone()); + + return res; + } + + void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override + { + ASTExpressionList list; + + if (columns) + for (const auto & column : columns->children) + list.children.push_back(column); + + if (indices) + for (const auto & index : indices->children) + list.children.push_back(index); + + if (!list.children.empty()) + list.formatImpl(s, state, frame; + } +}; + + /// CREATE TABLE or ATTACH TABLE query class ASTCreateQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster { @@ -100,6 +138,7 @@ public: bool is_materialized_view{false}; bool is_populate{false}; ASTExpressionList * columns = nullptr; + ASTColumns * columns_list = nullptr; String to_database; /// For CREATE MATERIALIZED VIEW mv TO table. String to_table; ASTStorage * storage = nullptr; @@ -115,6 +154,8 @@ public: auto res = std::make_shared(*this); res->children.clear(); + if (columns_list) + res->set(res->columns_list, columns_list->clone()); if (columns) res->set(res->columns, columns->clone()); if (storage) @@ -184,6 +225,15 @@ protected: << (!as_database.empty() ? backQuoteIfNeed(as_database) + "." : "") << backQuoteIfNeed(as_table); } + if (columns_list) + { + settings.ostr << (settings.one_line ? " (" : "\n("); + FormatStateStacked frame_nested = frame; + ++frame_nested.indent; + columns_list->formatImpl(settings, state, frame_nested); + settings.ostr << (settings.one_line ? ")" : "\n)"); + } + if (columns) { settings.ostr << (settings.one_line ? " (" : "\n("); diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index 891bccaa45a..cdbba778471 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -139,6 +139,35 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe return true; } + +bool ParserColumnAndIndexDeclaraion::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword s_index("INDEX"); + + ParserIndexDeclaration index_p; + ParserColumnDeclaration column_p; + + ASTPtr column = nullptr; + ASTPtr index = nullptr; + + if (s_index.ignore(pos, expected)) + { + if (!index_p.parse(pos, index, expected)) + return false; + } + else + { + if (!column_p.parse(pos, column, expected)) + return false; + } + + if (column) + node = column; + else + node = index; + return true; +} + bool ParserIndexDeclarationList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { return ParserList(std::make_unique(), std::make_unique(TokenType::Comma), false) @@ -146,6 +175,39 @@ bool ParserIndexDeclarationList::parseImpl(Pos & pos, ASTPtr & node, Expected & } +bool ParserColumnsOrIndicesDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTPtr list; + if (!ParserList(std::make_unique(), std::make_unique(TokenType::Comma), false) + .parse(pos, list, expected)) + return false; + + ASTPtr columns = std::make_shared(); + ASTPtr indices = std::make_shared(); + + for (const auto & elem : list->children) + { + if (dynamic_cast(elem.get())) + columns->children.push_back(elem); + else if (dynamic_cast(elem.get())) + indices->children.push_back(elem); + else + return false; + } + + auto res = std::make_shared(); + + if (!columns->children.empty()) + res->set(res->columns, columns); + if (!indices->children.empty()) + res->set(res->indices, indices); + + node = res; + + return true; +} + + bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_engine("ENGINE"); diff --git a/dbms/src/Parsers/ParserCreateQuery.h b/dbms/src/Parsers/ParserCreateQuery.h index 90b5c99c851..e77b80a3b98 100644 --- a/dbms/src/Parsers/ParserCreateQuery.h +++ b/dbms/src/Parsers/ParserCreateQuery.h @@ -231,6 +231,15 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; + +class ParserColumnAndIndexDeclaraion : public IParserBase +{ +protected: + const char * getName() const override { return "column or index declaration"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + + class ParserIndexDeclarationList : public IParserBase { protected: @@ -239,6 +248,14 @@ protected: }; +class ParserColumnsOrIndicesDeclaration : public IParserBase +{ + protected: + const char * getName() const override { return "columns or indices declaration"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + + /** * ENGINE = name [PARTITION BY expr] [ORDER BY expr] [PRIMARY KEY expr] [SAMPLE BY expr] * [INDEXES name BY expr TYPE type(args) GRANULARITY value, ...] [SETTINGS name = value, ...] From 8152bc40d5c8a2df5377f08a3f2f93177d7805a4 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 20 Jan 2019 12:24:31 +0300 Subject: [PATCH 090/586] fix --- dbms/src/Parsers/ASTCreateQuery.h | 2 +- dbms/src/Parsers/ParserCreateQuery.cpp | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/dbms/src/Parsers/ASTCreateQuery.h b/dbms/src/Parsers/ASTCreateQuery.h index 4f5e3faca50..59e3c7abe35 100644 --- a/dbms/src/Parsers/ASTCreateQuery.h +++ b/dbms/src/Parsers/ASTCreateQuery.h @@ -123,7 +123,7 @@ public: list.children.push_back(index); if (!list.children.empty()) - list.formatImpl(s, state, frame; + list.formatImpl(s, state, frame); } }; diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index cdbba778471..8867d1714e4 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -94,7 +94,6 @@ bool ParserColumnDeclarationList::parseImpl(Pos & pos, ASTPtr & node, Expected & bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserKeyword s_by("BY"); ParserKeyword s_type("TYPE"); ParserKeyword s_granularity("GRANULARITY"); @@ -111,9 +110,6 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe if (!name_p.parse(pos, name, expected)) return false; - if (!s_by.ignore(pos, expected)) - return false; - if (!expression_p.parse(pos, expr, expected)) return false; From bc0e4facc81240b23b379f2df74ce48fcdb685d4 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 20 Jan 2019 12:33:29 +0300 Subject: [PATCH 091/586] fix comments --- dbms/src/Parsers/ParserCreateQuery.cpp | 2 +- dbms/src/Parsers/ParserCreateQuery.h | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index 8867d1714e4..271c65395f8 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -171,7 +171,7 @@ bool ParserIndexDeclarationList::parseImpl(Pos & pos, ASTPtr & node, Expected & } -bool ParserColumnsOrIndicesDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +bool ParserColumnsOrIndicesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr list; if (!ParserList(std::make_unique(), std::make_unique(TokenType::Comma), false) diff --git a/dbms/src/Parsers/ParserCreateQuery.h b/dbms/src/Parsers/ParserCreateQuery.h index e77b80a3b98..a471c5bca93 100644 --- a/dbms/src/Parsers/ParserCreateQuery.h +++ b/dbms/src/Parsers/ParserCreateQuery.h @@ -248,17 +248,16 @@ protected: }; -class ParserColumnsOrIndicesDeclaration : public IParserBase +class ParserColumnsOrIndicesDeclarationList : public IParserBase { protected: - const char * getName() const override { return "columns or indices declaration"; } + const char * getName() const override { return "columns or indices declaration list"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; /** - * ENGINE = name [PARTITION BY expr] [ORDER BY expr] [PRIMARY KEY expr] [SAMPLE BY expr] - * [INDEXES name BY expr TYPE type(args) GRANULARITY value, ...] [SETTINGS name = value, ...] + * ENGINE = name [PARTITION BY expr] [ORDER BY expr] [PRIMARY KEY expr] [SAMPLE BY expr] [SETTINGS name = value, ...] */ class ParserStorage : public IParserBase { @@ -274,6 +273,8 @@ protected: * name1 type1, * name2 type2, * ... + * INDEX name1 expr TYPE type1(args) GRANULARITY value, + * ... * ) ENGINE = engine * * Or: From 13a4fb2756eebf1b694b6c79a53575f16f6d14fa Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 20 Jan 2019 14:03:21 +0300 Subject: [PATCH 092/586] replaced columns --- dbms/src/Databases/DatabaseOrdinary.cpp | 5 ++++- dbms/src/Databases/DatabasesCommon.cpp | 4 ++-- .../src/Interpreters/InterpreterCreateQuery.cpp | 17 ++++++++++------- .../src/Interpreters/InterpreterSystemQuery.cpp | 2 +- dbms/src/Interpreters/SystemLog.h | 5 ++++- dbms/src/Parsers/ParserCreateQuery.cpp | 8 ++++---- dbms/src/Storages/StorageMaterializedView.cpp | 6 +++++- 7 files changed, 30 insertions(+), 17 deletions(-) diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index 958d65b7128..ceb85626a2a 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -531,7 +531,10 @@ void DatabaseOrdinary::alterTable( ASTCreateQuery & ast_create_query = typeid_cast(*ast); ASTPtr new_columns = InterpreterCreateQuery::formatColumns(columns); - ast_create_query.replace(ast_create_query.columns, new_columns); + auto new_columns_list = std::make_shared(); + new_columns_list->set(new_columns_list->columns, new_columns); + + ast_create_query.replace(ast_create_query.columns_list, new_columns_list); if (storage_modifier) storage_modifier(*ast_create_query.storage); diff --git a/dbms/src/Databases/DatabasesCommon.cpp b/dbms/src/Databases/DatabasesCommon.cpp index 3189701d13c..780140969ad 100644 --- a/dbms/src/Databases/DatabasesCommon.cpp +++ b/dbms/src/Databases/DatabasesCommon.cpp @@ -68,10 +68,10 @@ std::pair createTableFromDefinition( /// We do not directly use `InterpreterCreateQuery::execute`, because /// - the database has not been created yet; /// - the code is simpler, since the query is already brought to a suitable form. - if (!ast_create_query.columns) + if (!ast_create_query.columns_list || !ast_create_query.columns_list->columns) throw Exception("Missing definition of columns.", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED); - ColumnsDescription columns = InterpreterCreateQuery::getColumnsDescription(*ast_create_query.columns, context); + ColumnsDescription columns = InterpreterCreateQuery::getColumnsDescription(*ast_create_query.columns_list->columns, context); return { diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index eeb1af65d47..64d2ffd5e3b 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -454,9 +454,9 @@ ColumnsDescription InterpreterCreateQuery::setColumns( { ColumnsDescription res; - if (create.columns) + if (create.columns_list && create.columns_list->columns) { - res = getColumnsDescription(*create.columns, context); + res = getColumnsDescription(*create.columns_list->columns, context); } else if (!create.as_table.empty()) { @@ -472,10 +472,13 @@ ColumnsDescription InterpreterCreateQuery::setColumns( /// Even if query has list of columns, canonicalize it (unfold Nested columns). ASTPtr new_columns = formatColumns(res); - if (create.columns) - create.replace(create.columns, new_columns); + auto new_columns_list = std::make_shared(); + new_columns_list->set(new_columns_list->columns, new_columns); + + if (create.columns_list) + create.replace(create.columns_list, new_columns_list); else - create.set(create.columns, new_columns); + create.set(create.columns_list, new_columns_list); /// Check for duplicates std::set all_columns; @@ -555,7 +558,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) String table_name_escaped = escapeForFileName(table_name); // If this is a stub ATTACH query, read the query definition from the database - if (create.attach && !create.storage && !create.columns) + if (create.attach && !create.storage && !create.columns_list) { // Table SQL definition is available even if the table is detached auto query = context.getCreateTableQuery(database_name, table_name); @@ -574,7 +577,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) } Block as_select_sample; - if (create.select && (!create.attach || !create.columns)) + if (create.select && (!create.attach || !create.columns_list)) as_select_sample = InterpreterSelectWithUnionQuery::getSampleBlock(create.select->clone(), context); String as_database_name = create.as_database.empty() ? current_database : create.as_database; diff --git a/dbms/src/Interpreters/InterpreterSystemQuery.cpp b/dbms/src/Interpreters/InterpreterSystemQuery.cpp index fc472ad8a9e..06994f8e0ed 100644 --- a/dbms/src/Interpreters/InterpreterSystemQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSystemQuery.cpp @@ -252,7 +252,7 @@ StoragePtr InterpreterSystemQuery::tryRestartReplica(const String & database_nam create.attach = true; std::string data_path = database->getDataPath(); - auto columns = InterpreterCreateQuery::getColumnsDescription(*create.columns, system_context); + auto columns = InterpreterCreateQuery::getColumnsDescription(*create.columns_list->columns, system_context); StoragePtr table = StorageFactory::instance().get(create, data_path, diff --git a/dbms/src/Interpreters/SystemLog.h b/dbms/src/Interpreters/SystemLog.h index 1a4283fae8e..dd2569021c6 100644 --- a/dbms/src/Interpreters/SystemLog.h +++ b/dbms/src/Interpreters/SystemLog.h @@ -357,7 +357,10 @@ void SystemLog::prepareTable() create->table = table_name; Block sample = LogElement::createBlock(); - create->set(create->columns, InterpreterCreateQuery::formatColumns(sample.getNamesAndTypesList())); + + auto new_columns_list = std::make_shared(); + new_columns_list->set(new_columns_list->columns, InterpreterCreateQuery::formatColumns(sample.getNamesAndTypesList())); + create->set(create->columns_list, new_columns_list); ParserStorage storage_parser; ASTPtr storage_ast = parseQuery( diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index 271c65395f8..78c7a7e9823 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -318,7 +318,7 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserToken s_rparen(TokenType::ClosingRoundBracket); ParserStorage storage_p; ParserIdentifier name_p; - ParserColumnDeclarationList columns_p; + ParserColumnsOrIndicesDeclarationList columns_or_indices_p; ParserSelectWithUnionQuery select_p; ASTPtr database; @@ -391,7 +391,7 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) /// List of columns. if (s_lparen.ignore(pos, expected)) { - if (!columns_p.parse(pos, columns, expected)) + if (!columns_or_indices_p.parse(pos, columns, expected)) return false; if (!s_rparen.ignore(pos, expected)) @@ -493,7 +493,7 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) /// Optional - a list of columns can be specified. It must fully comply with SELECT. if (s_lparen.ignore(pos, expected)) { - if (!columns_p.parse(pos, columns, expected)) + if (!columns_or_indices_p.parse(pos, columns, expected)) return false; if (!s_rparen.ignore(pos, expected)) @@ -535,7 +535,7 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) getIdentifierName(to_database, query->to_database); getIdentifierName(to_table, query->to_table); - query->set(query->columns, columns); + query->set(query->columns_list, columns); query->set(query->storage, storage); getIdentifierName(as_database, query->as_database); diff --git a/dbms/src/Storages/StorageMaterializedView.cpp b/dbms/src/Storages/StorageMaterializedView.cpp index e2444862ca8..19d083896d3 100644 --- a/dbms/src/Storages/StorageMaterializedView.cpp +++ b/dbms/src/Storages/StorageMaterializedView.cpp @@ -139,7 +139,11 @@ StorageMaterializedView::StorageMaterializedView( auto manual_create_query = std::make_shared(); manual_create_query->database = target_database_name; manual_create_query->table = target_table_name; - manual_create_query->set(manual_create_query->columns, query.columns->ptr()); + + auto new_columns_list = std::make_shared(); + new_columns_list->set(new_columns_list->columns, query.columns->ptr()); + + manual_create_query->set(manual_create_query->columns_list, new_columns_list); manual_create_query->set(manual_create_query->storage, query.storage->ptr()); /// Execute the query. From 5e626be07f57144b7ffcdd681457598b0799ae49 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 20 Jan 2019 14:34:13 +0300 Subject: [PATCH 093/586] fix --- dbms/programs/copier/ClusterCopier.cpp | 11 +++++++---- dbms/src/Parsers/ASTCreateQuery.h | 12 ------------ dbms/src/Parsers/ASTIndexDeclaration.h | 9 +++++++-- dbms/src/Parsers/ParserCreateQuery.cpp | 8 ++++++++ dbms/src/Storages/StorageMaterializedView.cpp | 2 +- 5 files changed, 23 insertions(+), 19 deletions(-) diff --git a/dbms/programs/copier/ClusterCopier.cpp b/dbms/programs/copier/ClusterCopier.cpp index 4f285c83f17..e63f98fac7f 100644 --- a/dbms/programs/copier/ClusterCopier.cpp +++ b/dbms/programs/copier/ClusterCopier.cpp @@ -1179,7 +1179,7 @@ protected: /// Removes MATERIALIZED and ALIAS columns from create table query static ASTPtr removeAliasColumnsFromCreateQuery(const ASTPtr & query_ast) { - const ASTs & column_asts = typeid_cast(*query_ast).columns->children; + const ASTs & column_asts = typeid_cast(*query_ast).columns_list->columns->children; auto new_columns = std::make_shared(); for (const ASTPtr & column_ast : column_asts) @@ -1198,8 +1198,11 @@ protected: ASTPtr new_query_ast = query_ast->clone(); ASTCreateQuery & new_query = typeid_cast(*new_query_ast); - new_query.columns = new_columns.get(); - new_query.children.at(0) = std::move(new_columns); + + auto new_columns_list = std::make_shared(); + new_columns_list->set(new_columns_list->columns, new_columns); + + new_query.replace(new_query.columns_list, new_columns_list); return new_query_ast; } @@ -1217,7 +1220,7 @@ protected: res->table = new_table.second; res->children.clear(); - res->set(res->columns, create.columns->clone()); + res->set(res->columns_list, create.columns_list->clone()); res->set(res->storage, new_storage_ast->clone()); return res; diff --git a/dbms/src/Parsers/ASTCreateQuery.h b/dbms/src/Parsers/ASTCreateQuery.h index 59e3c7abe35..9ff877730a8 100644 --- a/dbms/src/Parsers/ASTCreateQuery.h +++ b/dbms/src/Parsers/ASTCreateQuery.h @@ -137,7 +137,6 @@ public: bool is_view{false}; bool is_materialized_view{false}; bool is_populate{false}; - ASTExpressionList * columns = nullptr; ASTColumns * columns_list = nullptr; String to_database; /// For CREATE MATERIALIZED VIEW mv TO table. String to_table; @@ -156,8 +155,6 @@ public: if (columns_list) res->set(res->columns_list, columns_list->clone()); - if (columns) - res->set(res->columns, columns->clone()); if (storage) res->set(res->storage, storage->clone()); if (select) @@ -234,15 +231,6 @@ protected: settings.ostr << (settings.one_line ? ")" : "\n)"); } - if (columns) - { - settings.ostr << (settings.one_line ? " (" : "\n("); - FormatStateStacked frame_nested = frame; - ++frame_nested.indent; - columns->formatImpl(settings, state, frame_nested); - settings.ostr << (settings.one_line ? ")" : "\n)"); - } - if (storage) storage->formatImpl(settings, state, frame); diff --git a/dbms/src/Parsers/ASTIndexDeclaration.h b/dbms/src/Parsers/ASTIndexDeclaration.h index 968fb3f3d01..c9af1e7ff84 100644 --- a/dbms/src/Parsers/ASTIndexDeclaration.h +++ b/dbms/src/Parsers/ASTIndexDeclaration.h @@ -41,9 +41,14 @@ public: void formatImpl(const FormatSettings & s, FormatState &state, FormatStateStacked frame) const override { - s.ostr << name; - s.ostr << (s.hilite ? hilite_keyword : "") << " BY " << (s.hilite ? hilite_none : ""); + frame.need_parens = false; + std::string indent_str = s.one_line ? "" : std::string(4 * frame.indent, ' '); + + s.ostr << s.nl_or_ws << indent_str << backQuoteIfNeed(name); + + s.ostr << " ("; expr->formatImpl(s, state, frame); + s.ostr << ")"; s.ostr << (s.hilite ? hilite_keyword : "") << " TYPE " << (s.hilite ? hilite_none : ""); type->formatImpl(s, state, frame); s.ostr << (s.hilite ? hilite_keyword : "") << " GRANULARITY " << (s.hilite ? hilite_none : ""); diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index 78c7a7e9823..4030c1aa9b2 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -96,6 +96,8 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe { ParserKeyword s_type("TYPE"); ParserKeyword s_granularity("GRANULARITY"); + ParserToken s_lparen(TokenType::OpeningRoundBracket); + ParserToken s_rparen(TokenType::ClosingRoundBracket); ParserIdentifier name_p; ParserIdentifierWithOptionalParameters ident_with_optional_params_p; @@ -110,9 +112,15 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe if (!name_p.parse(pos, name, expected)) return false; + if (!s_lparen.ignore(pos, expected)) + return false; + if (!expression_p.parse(pos, expr, expected)) return false; + if (!s_rparen.ignore(pos, expected)) + return false; + if (!s_type.ignore(pos, expected)) return false; diff --git a/dbms/src/Storages/StorageMaterializedView.cpp b/dbms/src/Storages/StorageMaterializedView.cpp index 19d083896d3..ae33cac8427 100644 --- a/dbms/src/Storages/StorageMaterializedView.cpp +++ b/dbms/src/Storages/StorageMaterializedView.cpp @@ -141,7 +141,7 @@ StorageMaterializedView::StorageMaterializedView( manual_create_query->table = target_table_name; auto new_columns_list = std::make_shared(); - new_columns_list->set(new_columns_list->columns, query.columns->ptr()); + new_columns_list->set(new_columns_list->columns, query.columns_list->columns->ptr()); manual_create_query->set(manual_create_query->columns_list, new_columns_list); manual_create_query->set(manual_create_query->storage, query.storage->ptr()); From cf377b98284726f5ae2a1e6bc768c9ea1d3a6030 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 20 Jan 2019 14:40:58 +0300 Subject: [PATCH 094/586] fixed parsing --- dbms/src/Parsers/ParserCreateQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index 4030c1aa9b2..565817c3d08 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -101,7 +101,7 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe ParserIdentifier name_p; ParserIdentifierWithOptionalParameters ident_with_optional_params_p; - ParserExpression expression_p; + ParserExpressionList expression_p(false); ParserUnsignedInteger granularity_p; ASTPtr name; From 9120bebe56970e0a5ecc5bf6fa4e3e000e45f585 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 20 Jan 2019 18:02:19 +0300 Subject: [PATCH 095/586] fixed printing --- dbms/src/Databases/DatabaseOrdinary.cpp | 5 +- .../Interpreters/InterpreterCreateQuery.cpp | 18 +++-- dbms/src/Parsers/ASTCreateQuery.h | 68 +++++++++++++++---- dbms/src/Parsers/ASTIndexDeclaration.h | 7 +- dbms/src/Parsers/ParserCreateQuery.cpp | 33 ++------- .../MergeTree/registerStorageMergeTree.cpp | 7 +- dbms/src/Storages/StorageFactory.cpp | 4 +- dbms/src/Storages/StorageMergeTree.cpp | 5 +- .../Storages/StorageReplicatedMergeTree.cpp | 5 +- 9 files changed, 82 insertions(+), 70 deletions(-) diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index ceb85626a2a..05a93510423 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -531,10 +531,7 @@ void DatabaseOrdinary::alterTable( ASTCreateQuery & ast_create_query = typeid_cast(*ast); ASTPtr new_columns = InterpreterCreateQuery::formatColumns(columns); - auto new_columns_list = std::make_shared(); - new_columns_list->set(new_columns_list->columns, new_columns); - - ast_create_query.replace(ast_create_query.columns_list, new_columns_list); + ast_create_query.columns_list->replace(ast_create_query.columns_list->columns, new_columns); if (storage_modifier) storage_modifier(*ast_create_query.storage); diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 64d2ffd5e3b..fe373455f66 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -106,7 +106,8 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) const ASTFunction & engine = *storage.engine; /// Currently, there are no database engines, that support any arguments. if (engine.arguments || engine.parameters || storage.partition_by || storage.primary_key - || storage.order_by || storage.sample_by || (storage.indices && !storage.indices->children.empty()) || storage.settings) + || storage.order_by || storage.sample_by || storage.settings || + (create.columns_list && create.columns_list->indices && !create.columns_list->indices->children.empty())) { std::stringstream ostr; formatAST(storage, ostr, false, false); @@ -472,13 +473,16 @@ ColumnsDescription InterpreterCreateQuery::setColumns( /// Even if query has list of columns, canonicalize it (unfold Nested columns). ASTPtr new_columns = formatColumns(res); - auto new_columns_list = std::make_shared(); - new_columns_list->set(new_columns_list->columns, new_columns); - - if (create.columns_list) - create.replace(create.columns_list, new_columns_list); - else + if (!create.columns_list) + { + auto new_columns_list = std::make_shared(); create.set(create.columns_list, new_columns_list); + } + + if (create.columns_list->columns) + create.columns_list->replace(create.columns_list->columns, new_columns); + else + create.columns_list->set(create.columns_list->columns, new_columns); /// Check for duplicates std::set all_columns; diff --git a/dbms/src/Parsers/ASTCreateQuery.h b/dbms/src/Parsers/ASTCreateQuery.h index 9ff877730a8..9315cd17761 100644 --- a/dbms/src/Parsers/ASTCreateQuery.h +++ b/dbms/src/Parsers/ASTCreateQuery.h @@ -19,7 +19,6 @@ public: IAST * primary_key = nullptr; IAST * order_by = nullptr; IAST * sample_by = nullptr; - ASTExpressionList * indices = nullptr; ASTSetQuery * settings = nullptr; String getID(char) const override { return "Storage definition"; } @@ -39,8 +38,6 @@ public: res->set(res->order_by, order_by->clone()); if (sample_by) res->set(res->sample_by, sample_by->clone()); - if (indices) - res->set(res->indices, indices->clone()); if (settings) res->set(res->settings, settings->clone()); @@ -75,11 +72,6 @@ public: s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << "SAMPLE BY " << (s.hilite ? hilite_none : ""); sample_by->formatImpl(s, state, frame); } - if (indices) - { - s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << "INDICES " << (s.hilite ? hilite_none : ""); - indices->formatImpl(s, state, frame); - } if (settings) { s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << "SETTINGS " << (s.hilite ? hilite_none : ""); @@ -92,6 +84,47 @@ public: class ASTColumns : public IAST { +private: + class ASTColumnsElement : public IAST + { + public: + String prefix; + IAST * elem; + + String getID(char c) const override { return "ASTColumnsElement for " + elem->getID(c); }; + + ASTPtr clone() const override + { + auto res = std::make_shared(); + res->prefix = prefix; + if (elem) + res->set(res->elem, elem->clone()); + return res; + } + + void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override + { + if (!elem) + return; + + if (prefix.empty()) + { + elem->formatImpl(s, state, frame); + return; + } + + frame.need_parens = false; + std::string indent_str = s.one_line ? "" : std::string(4 * frame.indent, ' '); + + s.ostr << s.nl_or_ws << indent_str; + s.ostr << (s.hilite ? hilite_keyword : "") << prefix << (s.hilite ? hilite_none : ""); + + FormatStateStacked frame_nested = frame; + ++frame_nested.indent; + + elem->formatImpl(s, state, frame_nested); + } + }; public: ASTExpressionList * columns = nullptr; ASTExpressionList * indices = nullptr; @@ -115,12 +148,21 @@ public: ASTExpressionList list; if (columns) - for (const auto & column : columns->children) - list.children.push_back(column); - + for (const auto &column : columns->children) + { + auto elem = std::make_shared(); + elem->prefix = ""; + elem->set(elem->elem, column->clone()); + list.children.push_back(elem); + } if (indices) - for (const auto & index : indices->children) - list.children.push_back(index); + for (const auto &index : indices->children) + { + auto elem = std::make_shared(); + elem->prefix = "INDEX"; + elem->set(elem->elem, index->clone()); + list.children.push_back(elem); + } if (!list.children.empty()) list.formatImpl(s, state, frame); diff --git a/dbms/src/Parsers/ASTIndexDeclaration.h b/dbms/src/Parsers/ASTIndexDeclaration.h index c9af1e7ff84..dca70a72137 100644 --- a/dbms/src/Parsers/ASTIndexDeclaration.h +++ b/dbms/src/Parsers/ASTIndexDeclaration.h @@ -44,11 +44,10 @@ public: frame.need_parens = false; std::string indent_str = s.one_line ? "" : std::string(4 * frame.indent, ' '); - s.ostr << s.nl_or_ws << indent_str << backQuoteIfNeed(name); - - s.ostr << " ("; + s.ostr << s.nl_or_ws << indent_str; + s.ostr << backQuoteIfNeed(name); + s.ostr << " "; expr->formatImpl(s, state, frame); - s.ostr << ")"; s.ostr << (s.hilite ? hilite_keyword : "") << " TYPE " << (s.hilite ? hilite_none : ""); type->formatImpl(s, state, frame); s.ostr << (s.hilite ? hilite_keyword : "") << " GRANULARITY " << (s.hilite ? hilite_none : ""); diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index 565817c3d08..2a7244071d6 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -96,12 +96,10 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe { ParserKeyword s_type("TYPE"); ParserKeyword s_granularity("GRANULARITY"); - ParserToken s_lparen(TokenType::OpeningRoundBracket); - ParserToken s_rparen(TokenType::ClosingRoundBracket); ParserIdentifier name_p; ParserIdentifierWithOptionalParameters ident_with_optional_params_p; - ParserExpressionList expression_p(false); + ParserExpression expression_p; ParserUnsignedInteger granularity_p; ASTPtr name; @@ -112,15 +110,9 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe if (!name_p.parse(pos, name, expected)) return false; - if (!s_lparen.ignore(pos, expected)) - return false; - if (!expression_p.parse(pos, expr, expected)) return false; - if (!s_rparen.ignore(pos, expected)) - return false; - if (!s_type.ignore(pos, expected)) return false; @@ -151,24 +143,20 @@ bool ParserColumnAndIndexDeclaraion::parseImpl(Pos & pos, ASTPtr & node, Expecte ParserIndexDeclaration index_p; ParserColumnDeclaration column_p; - ASTPtr column = nullptr; - ASTPtr index = nullptr; + ASTPtr new_node = nullptr; if (s_index.ignore(pos, expected)) { - if (!index_p.parse(pos, index, expected)) + if (!index_p.parse(pos, new_node, expected)) return false; } else { - if (!column_p.parse(pos, column, expected)) + if (!column_p.parse(pos, new_node, expected)) return false; } - if (column) - node = column; - else - node = index; + node = new_node; return true; } @@ -220,20 +208,17 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKeyword s_primary_key("PRIMARY KEY"); ParserKeyword s_order_by("ORDER BY"); ParserKeyword s_sample_by("SAMPLE BY"); - ParserKeyword s_indexes("INDICES"); ParserKeyword s_settings("SETTINGS"); ParserIdentifierWithOptionalParameters ident_with_optional_params_p; ParserExpression expression_p; ParserSetQuery settings_p(/* parse_only_internals_ = */ true); - ParserIndexDeclarationList indexes_p; ASTPtr engine; ASTPtr partition_by; ASTPtr primary_key; ASTPtr order_by; ASTPtr sample_by; - ASTPtr indices; ASTPtr settings; if (!s_engine.ignore(pos, expected)) @@ -278,13 +263,6 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; } - if (s_indexes.ignore(pos, expected)) { - if (indexes_p.parse(pos, indices, expected)) - continue; - else - return false; - } - if (s_settings.ignore(pos, expected)) { if (!settings_p.parse(pos, settings, expected)) @@ -300,7 +278,6 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) storage->set(storage->primary_key, primary_key); storage->set(storage->order_by, order_by); storage->set(storage->sample_by, sample_by); - storage->set(storage->indices, indices); storage->set(storage->settings, settings); diff --git a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp index 0572df4ae14..1827b031078 100644 --- a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -334,13 +334,12 @@ static StoragePtr create(const StorageFactory::Arguments & args) * - Sorting key in the ORDER BY clause; * - Primary key (if it is different from the sorting key) in the PRIMARY KEY clause; * - Sampling expression in the SAMPLE BY clause; - * - Secondary indices it the INDICES clause; * - Additional MergeTreeSettings in the SETTINGS clause; */ bool is_extended_storage_def = args.storage_def->partition_by || args.storage_def->primary_key || args.storage_def->order_by - || args.storage_def->sample_by || (args.storage_def->indices && !args.storage_def->indices->children.empty()) || args.storage_def->settings; + || args.storage_def->sample_by || (args.query.columns_list->indices && !args.query.columns_list->indices->children.empty()) || args.storage_def->settings; String name_part = args.engine_name.substr(0, args.engine_name.size() - strlen("MergeTree")); @@ -576,8 +575,8 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (args.storage_def->sample_by) sample_by_ast = args.storage_def->sample_by->ptr(); - if (args.storage_def->indices) { - indices_ast = args.storage_def->indices->ptr(); + if (args.query.columns_list && args.query.columns_list->indices) { + indices_ast = args.query.columns_list->indices->ptr(); } storage_settings.loadFromQuery(*args.storage_def); diff --git a/dbms/src/Storages/StorageFactory.cpp b/dbms/src/Storages/StorageFactory.cpp index 93f5ed16abc..6150bfb1017 100644 --- a/dbms/src/Storages/StorageFactory.cpp +++ b/dbms/src/Storages/StorageFactory.cpp @@ -94,8 +94,8 @@ StoragePtr StorageFactory::get( ErrorCodes::BAD_ARGUMENTS); } - if ((storage_def->partition_by || storage_def->primary_key || storage_def->order_by - || storage_def->sample_by || (storage_def->indices && !storage_def->indices->children.empty())) + if ((storage_def->partition_by || storage_def->primary_key || storage_def->order_by || storage_def->sample_by || + (query.columns_list && query.columns_list->indices && !query.columns_list->indices->children.empty())) && !endsWith(name, "MergeTree")) { throw Exception( diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 0ec7eaee753..03946221365 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -247,10 +247,7 @@ void StorageMergeTree::alter( if (new_indices_ast.get() != data.skip_indices_ast.get()) { - if (new_indices_ast == nullptr) - storage_ast.indices = nullptr; - else - storage_ast.set(storage_ast.indices, new_indices_ast); + // TODO: alter indices } }; diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 9bc158f7754..a2fc5fac083 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -468,10 +468,7 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column storage_ast.set(storage_ast.order_by, new_order_by_ast); - if (new_indices_ast) - storage_ast.set(storage_ast.indices, new_indices_ast); - else - storage_ast.indices = nullptr; + // TODO: set indices }; } From d6545177fb0601d29d0938bf78aad42c64815d30 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 20 Jan 2019 18:50:11 +0300 Subject: [PATCH 096/586] fix err --- dbms/src/Storages/StorageFactory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/StorageFactory.cpp b/dbms/src/Storages/StorageFactory.cpp index 6150bfb1017..319258e3adf 100644 --- a/dbms/src/Storages/StorageFactory.cpp +++ b/dbms/src/Storages/StorageFactory.cpp @@ -99,7 +99,7 @@ StoragePtr StorageFactory::get( && !endsWith(name, "MergeTree")) { throw Exception( - "Engine " + name + " doesn't support PARTITION BY, PRIMARY KEY, ORDER BY or SAMPLE BY clauses. " + "Engine " + name + " doesn't support PARTITION BY, PRIMARY KEY, ORDER BY or SAMPLE BY clauses and skipping indices. " "Currently only the MergeTree family of engines supports them", ErrorCodes::BAD_ARGUMENTS); } From 093b94ae38727b66c252394190efdc7ab8ae3826 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 20 Jan 2019 20:49:53 +0300 Subject: [PATCH 097/586] basic IndicesDescription --- dbms/src/Storages/IndicesDescription.cpp | 39 ++++++++++++++++++++++++ dbms/src/Storages/IndicesDescription.h | 22 +++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 dbms/src/Storages/IndicesDescription.cpp create mode 100644 dbms/src/Storages/IndicesDescription.h diff --git a/dbms/src/Storages/IndicesDescription.cpp b/dbms/src/Storages/IndicesDescription.cpp new file mode 100644 index 00000000000..7c626902931 --- /dev/null +++ b/dbms/src/Storages/IndicesDescription.cpp @@ -0,0 +1,39 @@ +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + + +namespace DB +{ + +String IndicesDescription::toString() const +{ + ASTExpressionList list; + for (const auto & index : indices) + list.children.push_back(index); + + return serializeAST(list, true); +} + +IndicesDescription IndicesDescription::parse(const String & str) +{ + IndicesDescription res; + ParserIndexDeclarationList parser; + ASTPtr list = parseQuery(parser, str, 0); + + for (const auto & index : list->children) + res.indices.push_back(std::dynamic_pointer_cast(index)); + + return res; +} + +} \ No newline at end of file diff --git a/dbms/src/Storages/IndicesDescription.h b/dbms/src/Storages/IndicesDescription.h new file mode 100644 index 00000000000..64065f8bff5 --- /dev/null +++ b/dbms/src/Storages/IndicesDescription.h @@ -0,0 +1,22 @@ +#pragma once + +#include + + +namespace DB +{ + +using IndicesAsts = std::vector>; + +struct IndicesDescription +{ + IndicesAsts indices; + + IndicesDescription() = default; + + String toString() const; + + static IndicesDescription parse(const String & str); +}; + +} \ No newline at end of file From 3f5447f471ee1a328ea676350d06199fe064b02c Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 20 Jan 2019 21:43:49 +0300 Subject: [PATCH 098/586] go to IndicesDescr --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 17 ++++++++--------- dbms/src/Storages/MergeTree/MergeTreeData.h | 6 ++++-- .../MergeTree/registerStorageMergeTree.cpp | 18 +++++++++--------- dbms/src/Storages/StorageMergeTree.cpp | 9 ++++----- dbms/src/Storages/StorageMergeTree.h | 2 +- .../Storages/StorageReplicatedMergeTree.cpp | 9 ++++----- dbms/src/Storages/StorageReplicatedMergeTree.h | 4 ++-- 7 files changed, 32 insertions(+), 33 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 012edf311eb..4e4b65d92f4 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -89,13 +89,13 @@ namespace ErrorCodes MergeTreeData::MergeTreeData( const String & database_, const String & table_, const String & full_path_, const ColumnsDescription & columns_, + const IndicesDescription & indices_, Context & context_, const String & date_column_name, const ASTPtr & partition_by_ast_, const ASTPtr & order_by_ast_, const ASTPtr & primary_key_ast_, const ASTPtr & sample_by_ast_, - const ASTPtr & indices_ast_, const MergingParams & merging_params_, const MergeTreeSettings & settings_, bool require_part_metadata_, @@ -116,7 +116,7 @@ MergeTreeData::MergeTreeData( data_parts_by_state_and_info(data_parts_indexes.get()) { setPrimaryKeyAndColumns(order_by_ast_, primary_key_ast_, columns_); - setSkipIndices(indices_ast_); + setSkipIndices(indices_); /// NOTE: using the same columns list as is read when performing actual merges. merging_params.check(getColumns().getAllPhysical()); @@ -349,13 +349,13 @@ void MergeTreeData::setPrimaryKeyAndColumns( } -void MergeTreeData::setSkipIndices(const ASTPtr &indices_asts, bool only_check) +void MergeTreeData::setSkipIndices(const IndicesDescription & indices, bool only_check) { - if (!indices_asts) + if (indices.indices.empty()) { if (!only_check) { - skip_indices_ast = nullptr; + skip_indices_description = indices; skip_indices_expr = nullptr; skip_indices.clear(); } @@ -364,10 +364,9 @@ void MergeTreeData::setSkipIndices(const ASTPtr &indices_asts, bool only_check) MergeTreeIndices new_indices; std::set names; - auto index_list = std::dynamic_pointer_cast(indices_asts); ASTPtr indices_expr_list = std::make_shared(); - for (const auto & index_ast : index_list->children) + for (const auto & index_ast : indices.indices) { const auto & index_decl = std::dynamic_pointer_cast(index_ast); @@ -396,7 +395,7 @@ void MergeTreeData::setSkipIndices(const ASTPtr &indices_asts, bool only_check) if (!only_check) { - skip_indices_ast = indices_asts; + skip_indices_description = indices; skip_indices_expr = new_skip_indices_expr; skip_indices = std::move(new_indices); } @@ -1141,7 +1140,7 @@ void MergeTreeData::checkAlter(const AlterCommands & commands) } setPrimaryKeyAndColumns(new_order_by_ast, new_primary_key_ast, new_columns, /* only_check = */ true); - setSkipIndices(new_indexes_ast, /* only_check = */ true); + //setSkipIndices(new_indexes_ast, /* only_check = */ true); /// Check that type conversions are possible. ExpressionActionsPtr unused_expression; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index d5d736b180b..3d7d93fdab7 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -304,13 +305,13 @@ public: MergeTreeData(const String & database_, const String & table_, const String & full_path_, const ColumnsDescription & columns_, + const IndicesDescription & indices_, Context & context_, const String & date_column_name, const ASTPtr & partition_by_ast_, const ASTPtr & order_by_ast_, const ASTPtr & primary_key_ast_, const ASTPtr & sample_by_ast_, /// nullptr, if sampling is not supported. - const ASTPtr & indices_ast_, const MergingParams & merging_params_, const MergeTreeSettings & settings_, bool require_part_metadata_, @@ -587,6 +588,7 @@ public: /// Secondary (data skipping) indices for MergeTree MergeTreeIndices skip_indices; ASTPtr skip_indices_ast; + IndicesDescription skip_indices_description; ExpressionActionsPtr skip_indices_expr; /// Names of columns for primary key + secondary sorting columns. @@ -731,7 +733,7 @@ private: void setPrimaryKeyAndColumns(const ASTPtr & new_order_by_ast, ASTPtr new_primary_key_ast, const ColumnsDescription & new_columns, bool only_check = false); - void setSkipIndices(const ASTPtr &indices_asts, bool only_check = false); + void setSkipIndices(const IndicesDescription & indices, bool only_check = false); void initPartitionKey(); diff --git a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp index 1827b031078..98bccc8d30f 100644 --- a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -554,7 +554,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) ASTPtr order_by_ast; ASTPtr primary_key_ast; ASTPtr sample_by_ast; - ASTPtr indices_ast; + IndicesDescription indices_description; MergeTreeSettings storage_settings = args.context.getMergeTreeSettings(); if (is_extended_storage_def) @@ -576,7 +576,9 @@ static StoragePtr create(const StorageFactory::Arguments & args) sample_by_ast = args.storage_def->sample_by->ptr(); if (args.query.columns_list && args.query.columns_list->indices) { - indices_ast = args.query.columns_list->indices->ptr(); + for (const auto & index : args.query.columns_list->indices->children) + indices_description.indices.push_back( + std::dynamic_pointer_cast(index->ptr())); } storage_settings.loadFromQuery(*args.storage_def); @@ -611,16 +613,14 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (replicated) return StorageReplicatedMergeTree::create( zookeeper_path, replica_name, args.attach, args.data_path, args.database_name, args.table_name, - args.columns, + args.columns, indices_description, args.context, date_column_name, partition_by_ast, order_by_ast, primary_key_ast, - sample_by_ast, indices_ast, merging_params, storage_settings, - args.has_force_restore_data_flag); + sample_by_ast, merging_params, storage_settings, args.has_force_restore_data_flag); else return StorageMergeTree::create( - args.data_path, args.database_name, args.table_name, args.columns, args.attach, - args.context, date_column_name, partition_by_ast, order_by_ast, primary_key_ast, - sample_by_ast, indices_ast, merging_params, storage_settings, - args.has_force_restore_data_flag); + args.data_path, args.database_name, args.table_name, args.columns, indices_description, + args.attach, args.context, date_column_name, partition_by_ast, order_by_ast, + primary_key_ast, sample_by_ast, merging_params, storage_settings, args.has_force_restore_data_flag); } diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 03946221365..e30dc75be30 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -51,6 +51,7 @@ StorageMergeTree::StorageMergeTree( const String & database_name_, const String & table_name_, const ColumnsDescription & columns_, + const IndicesDescription & indices_, bool attach, Context & context_, const String & date_column_name, @@ -58,17 +59,15 @@ StorageMergeTree::StorageMergeTree( const ASTPtr & order_by_ast_, const ASTPtr & primary_key_ast_, const ASTPtr & sample_by_ast_, /// nullptr, if sampling is not supported. - const ASTPtr & indexes_ast_, const MergeTreeData::MergingParams & merging_params_, const MergeTreeSettings & settings_, bool has_force_restore_data_flag) : path(path_), database_name(database_name_), table_name(table_name_), full_path(path + escapeForFileName(table_name) + '/'), global_context(context_), background_pool(context_.getBackgroundPool()), data(database_name, table_name, - full_path, columns_, + full_path, columns_, indices_, context_, date_column_name, partition_by_ast_, order_by_ast_, primary_key_ast_, - sample_by_ast_, indexes_ast_, merging_params_, - settings_, false, attach), + sample_by_ast_, merging_params_, settings_, false, attach), reader(data), writer(data), merger_mutator(data, global_context.getBackgroundPool()), log(&Logger::get(database_name_ + "." + table_name + " (StorageMergeTree)")) { @@ -255,7 +254,7 @@ void StorageMergeTree::alter( /// Reinitialize primary key because primary key column types might have changed. data.setPrimaryKeyAndColumns(new_order_by_ast, new_primary_key_ast, new_columns); - data.setSkipIndices(new_indices_ast); + //data.setSkipIndices(new_indices_ast); for (auto & transaction : transactions) transaction->commit(); diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index 1a824be54bc..b207c13825e 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -167,6 +167,7 @@ protected: const String & database_name_, const String & table_name_, const ColumnsDescription & columns_, + const IndicesDescription & indices_, bool attach, Context & context_, const String & date_column_name, @@ -174,7 +175,6 @@ protected: const ASTPtr & order_by_ast_, const ASTPtr & primary_key_ast_, const ASTPtr & sample_by_ast_, /// nullptr, if sampling is not supported. - const ASTPtr & indexes_ast_, const MergeTreeData::MergingParams & merging_params_, const MergeTreeSettings & settings_, bool has_force_restore_data_flag); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index a2fc5fac083..403b9e1dd85 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -201,13 +201,13 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( bool attach, const String & path_, const String & database_name_, const String & name_, const ColumnsDescription & columns_, + const IndicesDescription & indices_, Context & context_, const String & date_column_name, const ASTPtr & partition_by_ast_, const ASTPtr & order_by_ast_, const ASTPtr & primary_key_ast_, const ASTPtr & sample_by_ast_, - const ASTPtr & indexes_ast_, const MergeTreeData::MergingParams & merging_params_, const MergeTreeSettings & settings_, bool has_force_restore_data_flag) @@ -217,10 +217,9 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( zookeeper_path(global_context.getMacros()->expand(zookeeper_path_, database_name, table_name)), replica_name(global_context.getMacros()->expand(replica_name_, database_name, table_name)), data(database_name, table_name, - full_path, columns_, + full_path, columns_, indices_, context_, date_column_name, partition_by_ast_, order_by_ast_, primary_key_ast_, - sample_by_ast_, indexes_ast_, merging_params_, - settings_, true, attach, + sample_by_ast_, merging_params_, settings_, true, attach, [this] (const std::string & name) { enqueuePartForCheck(name); }), reader(data), writer(data), merger_mutator(data, global_context.getBackgroundPool()), queue(*this), fetcher(data), @@ -477,7 +476,7 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column /// Even if the primary/sorting keys didn't change we must reinitialize it /// because primary key column types might have changed. data.setPrimaryKeyAndColumns(new_order_by_ast, new_primary_key_ast, new_columns); - data.setSkipIndices(new_indices_ast); + //data.setSkipIndices(new_indices_ast); } diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index 472fc76cab7..50963c4c4b2 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -540,7 +540,7 @@ private: void clearColumnInPartition(const ASTPtr & partition, const Field & column_name, const Context & query_context); void dropPartition(const ASTPtr & query, const ASTPtr & partition, bool detach, const Context & query_context); void attachPartition(const ASTPtr & partition, bool part, const Context & query_context); - void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & query_context); + void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & query_context); void fetchPartition(const ASTPtr & partition, const String & from, const Context & query_context); protected: @@ -552,13 +552,13 @@ protected: bool attach, const String & path_, const String & database_name_, const String & name_, const ColumnsDescription & columns_, + const IndicesDescription & indices_, Context & context_, const String & date_column_name, const ASTPtr & partition_by_ast_, const ASTPtr & order_by_ast_, const ASTPtr & primary_key_ast_, const ASTPtr & sample_by_ast_, - const ASTPtr & indexes_ast_, const MergeTreeData::MergingParams & merging_params_, const MergeTreeSettings & settings_, bool has_force_restore_data_flag); From 977b1e20d05319afdce06c26ad0701dd293f4c90 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 20 Jan 2019 23:03:17 +0300 Subject: [PATCH 099/586] moved indices --- dbms/src/Storages/ITableDeclaration.cpp | 5 +++++ dbms/src/Storages/ITableDeclaration.h | 5 +++++ dbms/src/Storages/MergeTree/MergeTreeData.cpp | 4 ++-- dbms/src/Storages/MergeTree/MergeTreeData.h | 1 - 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/ITableDeclaration.cpp b/dbms/src/Storages/ITableDeclaration.cpp index d68693a82bc..2315e5a5538 100644 --- a/dbms/src/Storages/ITableDeclaration.cpp +++ b/dbms/src/Storages/ITableDeclaration.cpp @@ -31,6 +31,11 @@ void ITableDeclaration::setColumns(ColumnsDescription columns_) columns = std::move(columns_); } +void ITableDeclaration::setIndicesDescription(IndicesDescription indices_) +{ + indices = std::move(indices_); +} + bool ITableDeclaration::hasColumn(const String & column_name) const { diff --git a/dbms/src/Storages/ITableDeclaration.h b/dbms/src/Storages/ITableDeclaration.h index 5f15ad626f7..e2ac3b1d6c4 100644 --- a/dbms/src/Storages/ITableDeclaration.h +++ b/dbms/src/Storages/ITableDeclaration.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB @@ -15,6 +16,9 @@ public: virtual const ColumnsDescription & getColumns() const { return columns; } virtual void setColumns(ColumnsDescription columns_); + virtual const IndicesDescription & getIndicesDescription() const { return indices; } + virtual void setIndicesDescription(IndicesDescription indices_); + /// NOTE: These methods should include virtual columns, but should NOT include ALIAS columns /// (they are treated separately). virtual NameAndTypePair getColumn(const String & column_name) const; @@ -52,6 +56,7 @@ public: private: ColumnsDescription columns; + IndicesDescription indices; }; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 4e4b65d92f4..54b025d0fcf 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -355,7 +355,7 @@ void MergeTreeData::setSkipIndices(const IndicesDescription & indices, bool only { if (!only_check) { - skip_indices_description = indices; + setIndicesDescription(indices); skip_indices_expr = nullptr; skip_indices.clear(); } @@ -395,7 +395,7 @@ void MergeTreeData::setSkipIndices(const IndicesDescription & indices, bool only if (!only_check) { - skip_indices_description = indices; + setIndicesDescription(indices); skip_indices_expr = new_skip_indices_expr; skip_indices = std::move(new_indices); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 3d7d93fdab7..d21da91b90c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -588,7 +588,6 @@ public: /// Secondary (data skipping) indices for MergeTree MergeTreeIndices skip_indices; ASTPtr skip_indices_ast; - IndicesDescription skip_indices_description; ExpressionActionsPtr skip_indices_expr; /// Names of columns for primary key + secondary sorting columns. From c32d1983a05573273b892f39778aa5b23f51a08c Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 21 Jan 2019 12:47:35 +0300 Subject: [PATCH 100/586] go to indicesDescr --- dbms/src/Databases/DatabaseDictionary.cpp | 1 + dbms/src/Databases/DatabaseDictionary.h | 1 + dbms/src/Databases/DatabaseMemory.cpp | 1 + dbms/src/Databases/DatabaseMemory.h | 1 + dbms/src/Databases/DatabaseOrdinary.cpp | 3 + dbms/src/Databases/DatabaseOrdinary.h | 1 + dbms/src/Databases/IDatabase.h | 2 + .../Interpreters/InterpreterCreateQuery.cpp | 13 +++++ .../src/Interpreters/InterpreterCreateQuery.h | 3 + dbms/src/Storages/AlterCommands.cpp | 58 +++++++------------ dbms/src/Storages/AlterCommands.h | 9 +-- dbms/src/Storages/IStorage.cpp | 3 +- dbms/src/Storages/IndicesDescription.cpp | 6 ++ dbms/src/Storages/MergeTree/MergeTreeData.cpp | 36 ++++++------ dbms/src/Storages/MergeTree/MergeTreeData.h | 4 +- .../ReplicatedMergeTreeAlterThread.cpp | 4 +- dbms/src/Storages/StorageBuffer.cpp | 5 +- dbms/src/Storages/StorageDistributed.cpp | 5 +- dbms/src/Storages/StorageMerge.cpp | 5 +- dbms/src/Storages/StorageMergeTree.cpp | 29 ++++------ dbms/src/Storages/StorageMergeTree.h | 3 + dbms/src/Storages/StorageNull.cpp | 3 +- .../Storages/StorageReplicatedMergeTree.cpp | 41 +++++-------- 23 files changed, 119 insertions(+), 118 deletions(-) diff --git a/dbms/src/Databases/DatabaseDictionary.cpp b/dbms/src/Databases/DatabaseDictionary.cpp index 04fbd3b24a6..f423c4dc13f 100644 --- a/dbms/src/Databases/DatabaseDictionary.cpp +++ b/dbms/src/Databases/DatabaseDictionary.cpp @@ -147,6 +147,7 @@ void DatabaseDictionary::alterTable( const Context &, const String &, const ColumnsDescription &, + const IndicesDescription &, const ASTModifier &) { throw Exception("DatabaseDictionary: alterTable() is not supported", ErrorCodes::NOT_IMPLEMENTED); diff --git a/dbms/src/Databases/DatabaseDictionary.h b/dbms/src/Databases/DatabaseDictionary.h index 9ecc34f1f3e..7df42a4c8f5 100644 --- a/dbms/src/Databases/DatabaseDictionary.h +++ b/dbms/src/Databases/DatabaseDictionary.h @@ -72,6 +72,7 @@ public: const Context & context, const String & name, const ColumnsDescription & columns, + const IndicesDescription & indices, const ASTModifier & engine_modifier) override; time_t getTableMetadataModificationTime( diff --git a/dbms/src/Databases/DatabaseMemory.cpp b/dbms/src/Databases/DatabaseMemory.cpp index 7b736220308..3eea0bc666a 100644 --- a/dbms/src/Databases/DatabaseMemory.cpp +++ b/dbms/src/Databases/DatabaseMemory.cpp @@ -53,6 +53,7 @@ void DatabaseMemory::alterTable( const Context &, const String &, const ColumnsDescription &, + const IndicesDescription &, const ASTModifier &) { throw Exception("DatabaseMemory: alterTable() is not supported", ErrorCodes::NOT_IMPLEMENTED); diff --git a/dbms/src/Databases/DatabaseMemory.h b/dbms/src/Databases/DatabaseMemory.h index 6471dd14dd7..fe7cc783ba3 100644 --- a/dbms/src/Databases/DatabaseMemory.h +++ b/dbms/src/Databases/DatabaseMemory.h @@ -48,6 +48,7 @@ public: const Context & context, const String & name, const ColumnsDescription & columns, + const IndicesDescription & indices, const ASTModifier & engine_modifier) override; time_t getTableMetadataModificationTime( diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index 05a93510423..2634c6edae6 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -510,6 +510,7 @@ void DatabaseOrdinary::alterTable( const Context & context, const String & table_name, const ColumnsDescription & columns, + const IndicesDescription & indices, const ASTModifier & storage_modifier) { /// Read the definition of the table and replace the necessary parts with new ones. @@ -531,7 +532,9 @@ void DatabaseOrdinary::alterTable( ASTCreateQuery & ast_create_query = typeid_cast(*ast); ASTPtr new_columns = InterpreterCreateQuery::formatColumns(columns); + ASTPtr new_indices = InterpreterCreateQuery::formatIndices(indices); ast_create_query.columns_list->replace(ast_create_query.columns_list->columns, new_columns); + ast_create_query.columns_list->replace(ast_create_query.columns_list->columns, new_indices); if (storage_modifier) storage_modifier(*ast_create_query.storage); diff --git a/dbms/src/Databases/DatabaseOrdinary.h b/dbms/src/Databases/DatabaseOrdinary.h index bf6339eaaa7..887bf101d62 100644 --- a/dbms/src/Databases/DatabaseOrdinary.h +++ b/dbms/src/Databases/DatabaseOrdinary.h @@ -42,6 +42,7 @@ public: const Context & context, const String & name, const ColumnsDescription & columns, + const IndicesDescription & indices, const ASTModifier & engine_modifier) override; time_t getTableMetadataModificationTime( diff --git a/dbms/src/Databases/IDatabase.h b/dbms/src/Databases/IDatabase.h index 7b7b877b0e1..8ded7b98d7b 100644 --- a/dbms/src/Databases/IDatabase.h +++ b/dbms/src/Databases/IDatabase.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -117,6 +118,7 @@ public: const Context & context, const String & name, const ColumnsDescription & columns, + const IndicesDescription & indices, const ASTModifier & engine_modifier) = 0; /// Returns time of table's metadata change, 0 if there is no corresponding metadata file. diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index fe373455f66..944e2b0df7c 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -403,6 +403,19 @@ ASTPtr InterpreterCreateQuery::formatColumns(const ColumnsDescription & columns) return columns_list; } +ASTPtr InterpreterCreateQuery::formatIndices(const IndicesDescription & indices) +{ + if (indices.indices.empty()) + return nullptr; + + auto res = std::make_shared(); + + for (const auto & index : indices.indices) + res->children.push_back(index); + + return res; +} + ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTExpressionList & columns, const Context & context) { ColumnsDescription res; diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.h b/dbms/src/Interpreters/InterpreterCreateQuery.h index e450ae0728e..f8c76b14804 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.h +++ b/dbms/src/Interpreters/InterpreterCreateQuery.h @@ -2,6 +2,7 @@ #include #include +#include class ThreadPool; @@ -29,6 +30,8 @@ public: static ASTPtr formatColumns(const NamesAndTypesList & columns); static ASTPtr formatColumns(const ColumnsDescription & columns); + static ASTPtr formatIndices(const IndicesDescription & indices); + void setDatabaseLoadingThreadpool(ThreadPool & thread_pool_) { thread_pool = &thread_pool_; diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index 4e2bfa017da..d04c31b8062 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -162,8 +162,8 @@ static bool namesEqual(const String & name_without_dot, const DB::NameAndTypePai return (name_with_dot == name_type.name.substr(0, name_without_dot.length() + 1) || name_without_dot == name_type.name); } -void AlterCommand::apply(ColumnsDescription & columns_description, ASTPtr & order_by_ast, - ASTPtr & primary_key_ast, ASTPtr & indexes_decl_ast) const +void AlterCommand::apply(ColumnsDescription & columns_description, IndicesDescription & indices_description + , ASTPtr & order_by_ast, ASTPtr & primary_key_ast) const { if (type == ADD_COLUMN) { @@ -330,15 +330,9 @@ void AlterCommand::apply(ColumnsDescription & columns_description, ASTPtr & orde } else if (type == ADD_INDEX) { - ASTPtr new_indexes_decl_ast; - if (indexes_decl_ast) - new_indexes_decl_ast = indexes_decl_ast->clone(); - else - new_indexes_decl_ast = std::make_shared(); - if (std::any_of( - new_indexes_decl_ast->children.cbegin(), - new_indexes_decl_ast->children.cend(), + indices_description.indices.cbegin(), + indices_description.indices.cend(), [this](const ASTPtr & index_ast) { return typeid_cast(*index_ast).name == index_name; })) @@ -350,48 +344,40 @@ void AlterCommand::apply(ColumnsDescription & columns_description, ASTPtr & orde ErrorCodes::ILLEGAL_COLUMN}; } - auto insert_it = new_indexes_decl_ast->children.end(); + auto insert_it = indices_description.indices.end(); if (!after_index_name.empty()) { insert_it = std::find_if( - new_indexes_decl_ast->children.begin(), - new_indexes_decl_ast->children.end(), + indices_description.indices.begin(), + indices_description.indices.end(), [this](const ASTPtr & index_ast) { return typeid_cast(*index_ast).name == after_index_name; }); - if (insert_it == new_indexes_decl_ast->children.end()) { + if (insert_it == indices_description.indices.end()) { throw Exception("Wrong index name. Cannot find index `" + after_index_name + "` to insert after.", ErrorCodes::LOGICAL_ERROR); } ++insert_it; } - new_indexes_decl_ast->children.emplace(insert_it, index_decl); - indexes_decl_ast = new_indexes_decl_ast; + indices_description.indices.emplace(insert_it, std::dynamic_pointer_cast(index_decl)); } else if (type == DROP_INDEX) { - ASTPtr new_indexes_decl_ast; - if (indexes_decl_ast) - new_indexes_decl_ast = indexes_decl_ast->clone(); - else - new_indexes_decl_ast = std::make_shared(); - auto erase_it = std::find_if( - new_indexes_decl_ast->children.begin(), - new_indexes_decl_ast->children.end(), + indices_description.indices.begin(), + indices_description.indices.end(), [this](const ASTPtr & index_ast) { return typeid_cast(*index_ast).name == index_name; }); - if (erase_it == new_indexes_decl_ast->children.end()) + if (erase_it == indices_description.indices.end()) { throw Exception("Wrong index name. Cannot find index `" + index_name + "` to drop.", ErrorCodes::LOGICAL_ERROR); } - new_indexes_decl_ast->children.erase(erase_it); - indexes_decl_ast = new_indexes_decl_ast; + indices_description.indices.erase(erase_it); } else throw Exception("Wrong parameter type in ALTER query", ErrorCodes::LOGICAL_ERROR); @@ -407,21 +393,21 @@ bool AlterCommand::is_mutable() const return true; } -void AlterCommands::apply(ColumnsDescription & columns_description, ASTPtr & order_by_ast, - ASTPtr & primary_key_ast, ASTPtr & indexes_decl_ast) const +void AlterCommands::apply(ColumnsDescription & columns_description, IndicesDescription & indices_description, + ASTPtr & order_by_ast, ASTPtr & primary_key_ast) const { auto new_columns_description = columns_description; + auto new_indices_description = indices_description; auto new_order_by_ast = order_by_ast; auto new_primary_key_ast = primary_key_ast; - auto new_indexes_decl_ast = indexes_decl_ast; for (const AlterCommand & command : *this) if (!command.ignore) - command.apply(new_columns_description, new_order_by_ast, new_primary_key_ast, new_indexes_decl_ast); + command.apply(new_columns_description, new_indices_description, new_order_by_ast, new_primary_key_ast); columns_description = std::move(new_columns_description); + indices_description = std::move(new_indices_description); order_by_ast = std::move(new_order_by_ast); primary_key_ast = std::move(new_primary_key_ast); - indexes_decl_ast = std::move(new_indexes_decl_ast); } void AlterCommands::validate(const IStorage & table, const Context & context) @@ -636,17 +622,17 @@ void AlterCommands::validate(const IStorage & table, const Context & context) void AlterCommands::apply(ColumnsDescription & columns_description) const { auto out_columns_description = columns_description; + IndicesDescription indices_description; ASTPtr out_order_by; ASTPtr out_primary_key; - ASTPtr out_indexes_decl; - apply(out_columns_description, out_order_by, out_primary_key, out_indexes_decl); + apply(out_columns_description, indices_description, out_order_by, out_primary_key); if (out_order_by) throw Exception("Storage doesn't support modifying ORDER BY expression", ErrorCodes::NOT_IMPLEMENTED); if (out_primary_key) throw Exception("Storage doesn't support modifying PRIMARY KEY expression", ErrorCodes::NOT_IMPLEMENTED); - if (out_indexes_decl) - throw Exception("Storage doesn't support modifying INDEXES", ErrorCodes::NOT_IMPLEMENTED); + if (!indices_description.indices.empty()) + throw Exception("Storage doesn't support modifying indices", ErrorCodes::NOT_IMPLEMENTED); columns_description = std::move(out_columns_description); } diff --git a/dbms/src/Storages/AlterCommands.h b/dbms/src/Storages/AlterCommands.h index 90f932338a2..3833881b186 100644 --- a/dbms/src/Storages/AlterCommands.h +++ b/dbms/src/Storages/AlterCommands.h @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -79,8 +80,8 @@ struct AlterCommand static std::optional parse(const ASTAlterCommand * command); - void apply(ColumnsDescription & columns_description, ASTPtr & order_by_ast, - ASTPtr & primary_key_ast, ASTPtr & indexes_decl_ast) const; + void apply(ColumnsDescription & columns_description, IndicesDescription & indices_description + , ASTPtr & order_by_ast, ASTPtr & primary_key_ast) const; /// Checks that not only metadata touched by that command bool is_mutable() const; }; @@ -91,8 +92,8 @@ class Context; class AlterCommands : public std::vector { public: - void apply(ColumnsDescription & columns_description, ASTPtr & order_by_ast, - ASTPtr & primary_key_ast, ASTPtr & index_decl_ast) const; + void apply(ColumnsDescription & columns_description, IndicesDescription & indices_description, ASTPtr & order_by_ast, + ASTPtr & primary_key_ast) const; /// For storages that don't support MODIFY_ORDER_BY. void apply(ColumnsDescription & columns_description) const; diff --git a/dbms/src/Storages/IStorage.cpp b/dbms/src/Storages/IStorage.cpp index 07442ecbd15..0c2a4a4d0c9 100644 --- a/dbms/src/Storages/IStorage.cpp +++ b/dbms/src/Storages/IStorage.cpp @@ -24,8 +24,9 @@ void IStorage::alter(const AlterCommands & params, const String & database_name, auto lock = lockStructureForAlter(); auto new_columns = getColumns(); + auto new_indices = getIndicesDescription(); params.apply(new_columns); - context.getDatabase(database_name)->alterTable(context, table_name, new_columns, {}); + context.getDatabase(database_name)->alterTable(context, table_name, new_columns, new_indices, {}); setColumns(std::move(new_columns)); } diff --git a/dbms/src/Storages/IndicesDescription.cpp b/dbms/src/Storages/IndicesDescription.cpp index 7c626902931..c84f8b92998 100644 --- a/dbms/src/Storages/IndicesDescription.cpp +++ b/dbms/src/Storages/IndicesDescription.cpp @@ -17,6 +17,9 @@ namespace DB String IndicesDescription::toString() const { + if (indices.empty()) + return {}; + ASTExpressionList list; for (const auto & index : indices) list.children.push_back(index); @@ -26,6 +29,9 @@ String IndicesDescription::toString() const IndicesDescription IndicesDescription::parse(const String & str) { + if (str.empty()) + return {}; + IndicesDescription res; ParserIndexDeclarationList parser; ASTPtr list = parseQuery(parser, str, 0); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 54b025d0fcf..093df2309d1 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1057,10 +1057,10 @@ void MergeTreeData::checkAlter(const AlterCommands & commands) { /// Check that needed transformations can be applied to the list of columns without considering type conversions. auto new_columns = getColumns(); + auto new_indices = getIndicesDescription(); ASTPtr new_order_by_ast = order_by_ast; ASTPtr new_primary_key_ast = primary_key_ast; - ASTPtr new_indexes_ast = skip_indices_ast; - commands.apply(new_columns, new_order_by_ast, new_primary_key_ast, new_indexes_ast); + commands.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast); /// Set of columns that shouldn't be altered. NameSet columns_alter_forbidden; @@ -1140,7 +1140,7 @@ void MergeTreeData::checkAlter(const AlterCommands & commands) } setPrimaryKeyAndColumns(new_order_by_ast, new_primary_key_ast, new_columns, /* only_check = */ true); - //setSkipIndices(new_indexes_ast, /* only_check = */ true); + setSkipIndices(new_indices, /* only_check = */ true); /// Check that type conversions are possible. ExpressionActionsPtr unused_expression; @@ -1148,11 +1148,11 @@ void MergeTreeData::checkAlter(const AlterCommands & commands) bool unused_bool; createConvertExpression(nullptr, getColumns().getAllPhysical(), new_columns.getAllPhysical(), - skip_indices_ast, new_indexes_ast, unused_expression, unused_map, unused_bool); + getIndicesDescription().indices, new_indices.indices, unused_expression, unused_map, unused_bool); } void MergeTreeData::createConvertExpression(const DataPartPtr & part, const NamesAndTypesList & old_columns, const NamesAndTypesList & new_columns, - const ASTPtr & old_indices_ast, const ASTPtr & new_indices_ast, ExpressionActionsPtr & out_expression, + const IndicesAsts & old_indices, const IndicesAsts & new_indices, ExpressionActionsPtr & out_expression, NameToNameMap & out_rename_map, bool & out_force_update_metadata) const { out_expression = nullptr; @@ -1169,20 +1169,18 @@ void MergeTreeData::createConvertExpression(const DataPartPtr & part, const Name /// Remove old indices - std::set new_indices; - if (new_indices_ast) - for (const auto & index_decl : new_indices_ast->children) - new_indices.emplace(dynamic_cast(*index_decl.get()).name); - if (old_indices_ast) - for (const auto & index_decl : old_indices_ast->children) + std::set new_indices_set; + for (const auto & index_decl : new_indices) + new_indices_set.emplace(dynamic_cast(*index_decl.get()).name); + for (const auto & index_decl : old_indices) + { + const auto & index = dynamic_cast(*index_decl.get()); + if (!new_indices_set.count(index.name)) { - const auto & index = dynamic_cast(*index_decl.get()); - if (!new_indices.count(index.name)) - { - out_rename_map["skp_idx_" + index.name + ".idx"] = ""; - out_rename_map["skp_idx_" + index.name + ".mrk"] = ""; - } + out_rename_map["skp_idx_" + index.name + ".idx"] = ""; + out_rename_map["skp_idx_" + index.name + ".mrk"] = ""; } + } /// Collect counts for shared streams of different columns. As an example, Nested columns have shared stream with array sizes. std::map stream_counts; @@ -1314,14 +1312,14 @@ void MergeTreeData::createConvertExpression(const DataPartPtr & part, const Name MergeTreeData::AlterDataPartTransactionPtr MergeTreeData::alterDataPart( const DataPartPtr & part, const NamesAndTypesList & new_columns, - const ASTPtr & new_indices_ast, + const IndicesAsts & new_indices, bool skip_sanity_checks) { ExpressionActionsPtr expression; AlterDataPartTransactionPtr transaction(new AlterDataPartTransaction(part)); /// Blocks changes to the part. bool force_update_metadata; createConvertExpression(part, part->columns, new_columns, - skip_indices_ast, new_indices_ast, + getIndicesDescription().indices, new_indices, expression, transaction->rename_map, force_update_metadata); size_t num_files_to_modify = transaction->rename_map.size(); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index d21da91b90c..d45a09b3b4d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -490,7 +490,7 @@ public: AlterDataPartTransactionPtr alterDataPart( const DataPartPtr & part, const NamesAndTypesList & new_columns, - const ASTPtr & new_indices_ast, + const IndicesAsts & new_indices, bool skip_sanity_checks); /// Freezes all parts. @@ -744,7 +744,7 @@ private: /// Files to be deleted are mapped to an empty string in out_rename_map. /// If part == nullptr, just checks that all type conversions are possible. void createConvertExpression(const DataPartPtr & part, const NamesAndTypesList & old_columns, const NamesAndTypesList & new_columns, - const ASTPtr & old_indices_ast, const ASTPtr & new_indices_ast, + const IndicesAsts & old_indices, const IndicesAsts & new_indices, ExpressionActionsPtr & out_expression, NameToNameMap & out_rename_map, bool & out_force_update_metadata) const; /// Calculates column sizes in compressed form for the current state of data_parts. Call with data_parts mutex locked. diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp index 99bc62983ba..d6295fb130a 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp @@ -145,14 +145,14 @@ void ReplicatedMergeTreeAlterThread::run() parts = storage.data.getDataParts(); const auto columns_for_parts = storage.getColumns().getAllPhysical(); + const auto indices_for_parts = storage.getIndicesDescription(); for (const MergeTreeData::DataPartPtr & part : parts) { /// Update the part and write result to temporary files. /// TODO: You can skip checking for too large changes if ZooKeeper has, for example, /// node /flags/force_alter. - auto transaction = storage.data.alterDataPart(part, columns_for_parts, storage.data.skip_indices_ast, false); - + auto transaction = storage.data.alterDataPart(part, columns_for_parts, indices_for_parts.indices, false); if (!transaction) continue; diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index 5d76279c95f..27d8a318293 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -684,9 +684,10 @@ void StorageBuffer::alter(const AlterCommands & params, const String & database_ /// So that no blocks of the old structure remain. optimize({} /*query*/, {} /*partition_id*/, false /*final*/, false /*deduplicate*/, context); - ColumnsDescription new_columns = getColumns(); + auto new_columns = getColumns(); + auto new_indices = getIndicesDescription(); params.apply(new_columns); - context.getDatabase(database_name)->alterTable(context, table_name, new_columns, {}); + context.getDatabase(database_name)->alterTable(context, table_name, new_columns, new_indices, {}); setColumns(std::move(new_columns)); } diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index 8f4f31d458c..8ea077b671f 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -339,9 +339,10 @@ void StorageDistributed::alter(const AlterCommands & params, const String & data { auto lock = lockStructureForAlter(); - ColumnsDescription new_columns = getColumns(); + auto new_columns = getColumns(); + auto new_indices = getIndicesDescription(); params.apply(new_columns); - context.getDatabase(database_name)->alterTable(context, current_table_name, new_columns, {}); + context.getDatabase(database_name)->alterTable(context, current_table_name, new_columns, new_indices, {}); setColumns(std::move(new_columns)); } diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp index 754d5e4fdfe..406404c2293 100644 --- a/dbms/src/Storages/StorageMerge.cpp +++ b/dbms/src/Storages/StorageMerge.cpp @@ -398,9 +398,10 @@ void StorageMerge::alter(const AlterCommands & params, const String & database_n { auto lock = lockStructureForAlter(); - ColumnsDescription new_columns = getColumns(); + auto new_columns = getColumns(); + auto new_indices = getIndicesDescription(); params.apply(new_columns); - context.getDatabase(database_name)->alterTable(context, table_name, new_columns, {}); + context.getDatabase(database_name)->alterTable(context, table_name, new_columns, new_indices, {}); setColumns(new_columns); } diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index e30dc75be30..cfbce206f87 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -199,8 +199,9 @@ void StorageMergeTree::alter( { auto table_soft_lock = lockStructureForAlter(); auto new_columns = getColumns(); + auto new_indices = getIndicesDescription(); params.apply(new_columns); - context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, {}); + context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, {}); setColumns(std::move(new_columns)); return; } @@ -213,22 +214,17 @@ void StorageMergeTree::alter( data.checkAlter(params); auto new_columns = data.getColumns(); + auto new_indices = data.getIndicesDescription(); ASTPtr new_order_by_ast = data.order_by_ast; ASTPtr new_primary_key_ast = data.primary_key_ast; - ASTPtr new_indices_ast = data.skip_indices_ast; - params.apply(new_columns, new_order_by_ast, new_primary_key_ast, new_indices_ast); - - if (new_indices_ast && new_indices_ast->children.empty()) - { - new_indices_ast.reset(); - } + params.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast); auto parts = data.getDataParts({MergeTreeDataPartState::PreCommitted, MergeTreeDataPartState::Committed, MergeTreeDataPartState::Outdated}); auto columns_for_parts = new_columns.getAllPhysical(); std::vector transactions; for (const MergeTreeData::DataPartPtr & part : parts) { - if (auto transaction = data.alterDataPart(part, columns_for_parts, new_indices_ast, false)) + if (auto transaction = data.alterDataPart(part, columns_for_parts, new_indices.indices, false)) transactions.push_back(std::move(transaction)); } @@ -243,18 +239,13 @@ void StorageMergeTree::alter( if (new_primary_key_ast.get() != data.primary_key_ast.get()) storage_ast.set(storage_ast.primary_key, new_primary_key_ast); - - if (new_indices_ast.get() != data.skip_indices_ast.get()) - { - // TODO: alter indices - } }; - context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, storage_modifier); + context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, storage_modifier); /// Reinitialize primary key because primary key column types might have changed. data.setPrimaryKeyAndColumns(new_order_by_ast, new_primary_key_ast, new_columns); - //data.setSkipIndices(new_indices_ast); + data.setSkipIndices(new_indices); for (auto & transaction : transactions) transaction->commit(); @@ -710,10 +701,10 @@ void StorageMergeTree::clearColumnInPartition(const ASTPtr & partition, const Fi alter_command.column_name = get(column_name); auto new_columns = getColumns(); + auto new_indices = getIndicesDescription(); ASTPtr ignored_order_by_ast; ASTPtr ignored_primary_key_ast; - ASTPtr ignored_indexes_ast; - alter_command.apply(new_columns, ignored_order_by_ast, ignored_primary_key_ast, ignored_indexes_ast); + alter_command.apply(new_columns, new_indices, ignored_order_by_ast, ignored_primary_key_ast); auto columns_for_parts = new_columns.getAllPhysical(); for (const auto & part : parts) @@ -721,7 +712,7 @@ void StorageMergeTree::clearColumnInPartition(const ASTPtr & partition, const Fi if (part->info.partition_id != partition_id) throw Exception("Unexpected partition ID " + part->info.partition_id + ". This is a bug.", ErrorCodes::LOGICAL_ERROR); - if (auto transaction = data.alterDataPart(part, columns_for_parts, ignored_indexes_ast, false)) + if (auto transaction = data.alterDataPart(part, columns_for_parts, new_indices.indices, false)) transactions.push_back(std::move(transaction)); LOG_DEBUG(log, "Removing column " << get(column_name) << " from part " << part->name); diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index b207c13825e..0d6b662abc3 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -43,6 +43,9 @@ public: const ColumnsDescription & getColumns() const override { return data.getColumns(); } void setColumns(ColumnsDescription columns_) override { return data.setColumns(std::move(columns_)); } + virtual const IndicesDescription & getIndicesDescription() const override { return data.getIndicesDescription(); } + virtual void setIndicesDescription(IndicesDescription indices_) override { data.setIndicesDescription(std::move(indices_)); } + NameAndTypePair getColumn(const String & column_name) const override { return data.getColumn(column_name); } bool hasColumn(const String & column_name) const override { return data.hasColumn(column_name); } diff --git a/dbms/src/Storages/StorageNull.cpp b/dbms/src/Storages/StorageNull.cpp index d23680b1c1a..59c46fcafd2 100644 --- a/dbms/src/Storages/StorageNull.cpp +++ b/dbms/src/Storages/StorageNull.cpp @@ -35,8 +35,9 @@ void StorageNull::alter(const AlterCommands & params, const String & current_dat auto lock = lockStructureForAlter(); ColumnsDescription new_columns = getColumns(); + IndicesDescription new_indices = getIndicesDescription(); params.apply(new_columns); - context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, {}); + context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, {}); setColumns(std::move(new_columns)); } diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 403b9e1dd85..c01b9706121 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -418,7 +418,7 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column { ASTPtr new_primary_key_ast = data.primary_key_ast; ASTPtr new_order_by_ast = data.order_by_ast; - ASTPtr new_indices_ast = data.skip_indices_ast; + auto new_indices = data.getIndicesDescription(); IDatabase::ASTModifier storage_modifier; if (!metadata_diff.empty()) { @@ -445,13 +445,7 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column } if (metadata_diff.skip_indices_changed) - { - ParserIndexDeclarationList parser; - if (metadata_diff.new_skip_indices.empty()) - new_indices_ast.reset(); - else - new_indices_ast = parseQuery(parser, metadata_diff.new_skip_indices, 0); - } + new_indices = IndicesDescription::parse(metadata_diff.new_skip_indices); storage_modifier = [&](IAST & ast) { @@ -466,17 +460,15 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column storage_ast.set(storage_ast.primary_key, new_primary_key_ast); storage_ast.set(storage_ast.order_by, new_order_by_ast); - - // TODO: set indices }; } - global_context.getDatabase(database_name)->alterTable(global_context, table_name, new_columns, storage_modifier); + global_context.getDatabase(database_name)->alterTable(global_context, table_name, new_columns, new_indices, storage_modifier); /// Even if the primary/sorting keys didn't change we must reinitialize it /// because primary key column types might have changed. data.setPrimaryKeyAndColumns(new_order_by_ast, new_primary_key_ast, new_columns); - //data.setSkipIndices(new_indices_ast); + data.setSkipIndices(new_indices); } @@ -1544,10 +1536,10 @@ void StorageReplicatedMergeTree::executeClearColumnInPartition(const LogEntry & alter_command.column_name = entry.column_name; auto new_columns = getColumns(); + auto new_indices = getIndicesDescription(); ASTPtr ignored_order_by_ast; ASTPtr ignored_primary_key_ast; - ASTPtr ignored_indexes_ast; - alter_command.apply(new_columns, ignored_order_by_ast, ignored_primary_key_ast, ignored_indexes_ast); + alter_command.apply(new_columns, new_indices, ignored_order_by_ast, ignored_primary_key_ast); size_t modified_parts = 0; auto parts = data.getDataParts(); @@ -1567,7 +1559,7 @@ void StorageReplicatedMergeTree::executeClearColumnInPartition(const LogEntry & LOG_DEBUG(log, "Clearing column " << entry.column_name << " in part " << part->name); - auto transaction = data.alterDataPart(part, columns_for_parts, ignored_indexes_ast, false); + auto transaction = data.alterDataPart(part, columns_for_parts, new_indices.indices, false); if (!transaction) continue; @@ -3135,14 +3127,10 @@ void StorageReplicatedMergeTree::alter(const AlterCommands & params, data.checkAlter(params); ColumnsDescription new_columns = data.getColumns(); + IndicesDescription new_indices = data.getIndicesDescription(); ASTPtr new_order_by_ast = data.order_by_ast; ASTPtr new_primary_key_ast = data.primary_key_ast; - ASTPtr new_indexes_ast = data.skip_indices_ast; - params.apply(new_columns, new_order_by_ast, new_primary_key_ast, new_indexes_ast); - if (new_indexes_ast && new_indexes_ast->children.empty()) - { - new_indexes_ast.reset(); - } + params.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast); String new_columns_str = new_columns.toString(); if (new_columns_str != data.getColumns().toString()) @@ -3151,13 +3139,10 @@ void StorageReplicatedMergeTree::alter(const AlterCommands & params, ReplicatedMergeTreeTableMetadata new_metadata(data); if (new_order_by_ast.get() != data.order_by_ast.get()) new_metadata.sorting_key = serializeAST(*MergeTreeData::extractKeyExpressionList(new_order_by_ast)); - if (new_indexes_ast.get() != data.skip_indices_ast.get()) - { - if (new_indexes_ast) - new_metadata.skip_indices = serializeAST(*new_indexes_ast.get()); - else - new_metadata.skip_indices = {}; - } + + String new_indices_str = new_indices.toString(); + if (new_indices_str != data.getIndicesDescription().toString()) + new_metadata.skip_indices = new_indices_str; String new_metadata_str = new_metadata.toString(); if (new_metadata_str != ReplicatedMergeTreeTableMetadata(data).toString()) From 9f1a093e0c46bfd2500d8b373d385934e4c394ea Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 21 Jan 2019 12:53:25 +0300 Subject: [PATCH 101/586] fix test minmax_index* --- .../queries/0_stateless/00823_minmax_index.sql | 11 +++++++---- .../00823_minmax_index_replicated_zookeeper.sql | 16 ++++++++++------ 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00823_minmax_index.sql b/dbms/tests/queries/0_stateless/00823_minmax_index.sql index 3278eac275d..fa233a15819 100644 --- a/dbms/tests/queries/0_stateless/00823_minmax_index.sql +++ b/dbms/tests/queries/0_stateless/00823_minmax_index.sql @@ -8,12 +8,15 @@ CREATE TABLE test.minmax_idx d Decimal(10, 2), s String, e Enum8('a' = 1, 'b' = 2, 'c' = 3), - dt Date + dt Date, + INDEX + idx_all (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 4, + INDEX + idx_all2 (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 2, + INDEX + idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3 ) ENGINE = MergeTree() ORDER BY u64 -INDICES idx_all BY (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 4, - idx_all2 BY (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 2, - idx_2 BY (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3 SETTINGS index_granularity = 2; diff --git a/dbms/tests/queries/0_stateless/00823_minmax_index_replicated_zookeeper.sql b/dbms/tests/queries/0_stateless/00823_minmax_index_replicated_zookeeper.sql index d264d769874..f46cceb4096 100644 --- a/dbms/tests/queries/0_stateless/00823_minmax_index_replicated_zookeeper.sql +++ b/dbms/tests/queries/0_stateless/00823_minmax_index_replicated_zookeeper.sql @@ -9,11 +9,13 @@ CREATE TABLE test.minmax_idx1 d Decimal(10, 2), s String, e Enum8('a' = 1, 'b' = 2, 'c' = 3), - dt Date + dt Date, + INDEX + idx_all (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 2, + INDEX + idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3 ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/minmax', 'r1') ORDER BY u64 -INDICES idx_all BY (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 2, - idx_2 BY (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3 SETTINGS index_granularity = 2; CREATE TABLE test.minmax_idx2 @@ -24,11 +26,13 @@ CREATE TABLE test.minmax_idx2 d Decimal(10, 2), s String, e Enum8('a' = 1, 'b' = 2, 'c' = 3), - dt Date + dt Date, + INDEX + idx_all (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 2, + INDEX + idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3 ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/minmax', 'r2') ORDER BY u64 -INDICES idx_all BY (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 2, - idx_2 BY (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3 SETTINGS index_granularity = 2; From d2074985734522380305e5a3e4933fe9663f845a Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 21 Jan 2019 13:39:24 +0300 Subject: [PATCH 102/586] Added ColumnsHashing --- dbms/src/Common/ColumnsHashing.h | 881 +++++++++++++++++++++++++++ dbms/src/Interpreters/Aggregator.cpp | 173 ++---- dbms/src/Interpreters/Aggregator.h | 638 +------------------ 3 files changed, 937 insertions(+), 755 deletions(-) create mode 100644 dbms/src/Common/ColumnsHashing.h diff --git a/dbms/src/Common/ColumnsHashing.h b/dbms/src/Common/ColumnsHashing.h new file mode 100644 index 00000000000..0a6d5464341 --- /dev/null +++ b/dbms/src/Common/ColumnsHashing.h @@ -0,0 +1,881 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ColumnsHashing +{ + +/// Generic context for HashMethod. Context is shared between multiple threads, all methods must be thread-safe. +/// Is used for caching. +class HashMethodContext +{ +public: + virtual ~HashMethodContext() = default; + + struct Settings + { + size_t max_threads; + }; +}; + +using HashMethodContextPtr = std::shared_ptr; + + +template +struct MappedTraits +{ + using Type = void *; + static Type getMapped(T &) { return nullptr; } + static T & getKey(T & key) { return key; } +}; + +template +struct MappedTraits> +{ + using Type = Second *; + static Type getMapped(PairNoInit & value) { return &value.second; } + static First & getKey(PairNoInit & value) { return value.first; } +}; + +template +struct HashTableTraits +{ + using Value = typename Data::value_type; + using Mapped = typename MappedTraits::Type; + + static Mapped getMapped(Value & value) { return MappedTraits::getMapped(value); } + static auto & getKey(Value & value) { return MappedTraits::getKey(value); } +}; + +template +struct LastElementCache +{ + static constexpr bool consecutive_keys_optimization = consecutive_keys_optimization_; + using Value = typename HashTableTraits::Value; + Value value; + bool empty = true; + bool found = false; + + auto getMapped() { return HashTableTraits::getMapped(value); } + auto & getKey() { return HashTableTraits::getKey(value); } +}; + +template +struct LastElementCache +{ + static constexpr bool consecutive_keys_optimization = false; +}; + +template +inline ALWAYS_INLINE typename HashTableTraits::Value & emplaceKeyImpl( + Key key, Data & data, bool & inserted, Cache & cache [[maybe_unused]]) +{ + if constexpr (Cache::consecutive_keys_optimization) + { + if (!cache.empty && cache.found && cache.getKey() == key) + { + inserted = false; + return cache.value; + } + } + + typename Data::iterator it; + data.emplace(key, it, inserted); + auto & value = *it; + + if constexpr (Cache::consecutive_keys_optimization) + { + cache.value = value; + cache.empty = false; + cache.found = true; + } + + return value; +} + +template +inline ALWAYS_INLINE typename HashTableTraits::Mapped findKeyImpl( + Key key, Data & data, bool & found, Cache & cache [[maybe_unused]]) +{ + if constexpr (Cache::consecutive_keys_optimization) + { + if (!cache.empty && cache.getKey() == key) + { + found = cache.found; + return found ? cache.getMapped() : nullptr; + } + } + + auto it = data.find(key); + + found = it != data.end(); + auto mapped = found ? HashTableTraits::getMapped(*it) + : nullptr; + + if constexpr (Cache::consecutive_keys_optimization) + { + if (found) + cache.value = *it; + else + cache.getKey() = key; + + cache.empty = false; + cache.found = found; + } + + return mapped; +} + + +/// For the case where there is one numeric key. +template /// UInt8/16/32/64 for any type with corresponding bit width. +struct HashMethodOneNumber +{ + const char * vec; + LastElementCache last_elem_cache; + + /// If the keys of a fixed length then key_sizes contains their lengths, empty otherwise. + HashMethodOneNumber(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) + { + vec = key_columns[0]->getRawData().data; + } + + /// Creates context. Method is called once and result context is used in all threads. + static HashMethodContextPtr createContext(const HashMethodContext::Settings &) { return nullptr; } + + FieldType getKey(size_t row) const { return unalignedLoad(vec + row * sizeof(FieldType)); } + + /// Emplace key into HashTable or HashMap. If Data is HashMap, returns ptr to value, otherwise nullptr. + template + ALWAYS_INLINE typename HashTableTraits::Mapped emplaceKey( + Data & data, /// HashTable + size_t row, /// From which row of the block insert the key + bool & inserted, + Arena & /*pool*/) /// For Serialized method, key may be placed in pool. + { + return HashTableTraits::getMapped(emplaceKeyImpl(getKey(row), data, inserted, last_elem_cache)); + } + + /// Find key into HashTable or HashMap. If Data is HashMap and key was found, returns ptr to value, otherwise nullptr. + template + ALWAYS_INLINE typename HashTableTraits::Mapped findKey(Data & data, size_t row, bool & found, Arena & /*pool*/) + { + return findKeyImpl(getKey(row), data, found, last_elem_cache); + } + + /// Insert the key from the hash table into columns. + template + static void insertKeyIntoColumns(const Value & value, MutableColumns & key_columns, const Sizes & /*key_sizes*/) + { + static_cast(key_columns[0].get())->insertRawData(reinterpret_cast(&value.first)); + } + + /// Get hash value of row. + template + ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & /*pool*/) + { + return data.hash(getKey(row)); + } + + /// Get StringRef from value which can be inserted into column. + template + static StringRef getValueRef(const Value & value) + { + return StringRef(reinterpret_cast(&value.first), sizeof(value.first)); + } + + /// Cache last result if key was inserted. + template + ALWAYS_INLINE void cacheData(size_t /*row*/, Mapped mapped) + { + *last_elem_cache.getMapped() = mapped; + } + +protected: + template + static ALWAYS_INLINE void onNewKey(Value & /*value*/, Arena & /*pool*/) {} +}; + + +/// For the case where there is one string key. +template +struct HashMethodString +{ + const IColumn::Offset * offsets; + const UInt8 * chars; + + LastElementCache last_elem_cache; + + HashMethodString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) + { + const IColumn & column = *key_columns[0]; + const ColumnString & column_string = static_cast(column); + offsets = column_string.getOffsets().data(); + chars = column_string.getChars().data(); + } + + static HashMethodContextPtr createContext(const HashMethodContext::Settings &) { return nullptr; } + + StringRef getKey(size_t row) const { return StringRef(chars + offsets[row - 1], offsets[row] - offsets[row - 1] - 1); } + + template + ALWAYS_INLINE typename HashTableTraits::Mapped emplaceKey(Data & data, size_t row, bool & inserted, Arena & pool) + { + auto & value = emplaceKeyImpl(getKey(row), data, inserted, last_elem_cache); + if (inserted) + { + auto & key = HashTableTraits::getKey(value); + if (key.size) + key.data = pool.insert(key.data, key.size); + } + return HashTableTraits::getMapped(value); + } + + template + ALWAYS_INLINE typename HashTableTraits::Mapped findKey(Data & data, size_t row, bool & found, Arena & /*pool*/) + { + return findKeyImpl(getKey(row), data, found, last_elem_cache); + } + + template + static void insertKeyIntoColumns(const Value & value, MutableColumns & key_columns, const Sizes & /*key_sizes*/) + { + key_columns[0]->insertData(value.first.data, value.first.size); + } + + template + ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & /*pool*/) + { + return data.hash(getKey(row)); + } + + template + static StringRef getValueRef(const Value & value) + { + return StringRef(value.first.data, value.first.size); + } + + template + ALWAYS_INLINE void cacheData(size_t /*row*/, Mapped mapped) + { + *last_elem_cache.getMapped() = mapped; + } + +protected: + template + static ALWAYS_INLINE void onNewKey(Value & value, Arena & pool) + { + if (value.first.size) + value.first.data = pool.insert(value.first.data, value.first.size); + } +}; + + +/// For the case where there is one fixed-length string key. +template +struct HashMethodFixedString +{ + size_t n; + const ColumnFixedString::Chars * chars; + + LastElementCache last_elem_cache; + + HashMethodFixedString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) + { + const IColumn & column = *key_columns[0]; + const ColumnFixedString & column_string = static_cast(column); + n = column_string.getN(); + chars = &column_string.getChars(); + } + + static HashMethodContextPtr createContext(const HashMethodContext::Settings &) { return nullptr; } + + StringRef getKey(size_t row) const { return StringRef(&(*chars)[row * n], n); } + + template + ALWAYS_INLINE typename HashTableTraits::Mapped emplaceKey(Data & data, size_t row, bool & inserted, Arena & pool) + { + auto & value = emplaceKeyImpl(getKey(row), data, inserted, last_elem_cache); + if (inserted) + { + auto & key = HashTableTraits::getKey(value); + key.data = pool.insert(key.data, key.size); + } + return HashTableTraits::getMapped(value); + } + + template + ALWAYS_INLINE typename HashTableTraits::Mapped findKey(Data & data, size_t row, bool & found, Arena & /*pool*/) + { + return findKeyImpl(getKey(row), data, found, last_elem_cache); + } + + template + static void insertKeyIntoColumns(const Value & value, MutableColumns & key_columns, const Sizes & /*key_sizes*/) + { + key_columns[0]->insertData(value.first.data, value.first.size); + } + + template + ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & /*pool*/) + { + return data.hash(getKey(row)); + } + + template + static StringRef getValueRef(const Value & value) + { + return StringRef(value.first.data, value.first.size); + } + + template + ALWAYS_INLINE void cacheData(size_t /*row*/, Mapped mapped) + { + *last_elem_cache.getMapped() = mapped; + } + +protected: + template + static ALWAYS_INLINE void onNewKey(Value & value, Arena & pool) + { + value.first.data = pool.insert(value.first.data, value.first.size); + } +}; + + +/// Cache stores dictionaries and saved_hash per dictionary key. +class LowCardinalityDictionaryCache : public HashMethodContext +{ +public: + /// Will assume that dictionaries with same hash has the same keys. + /// Just in case, check that they have also the same size. + struct DictionaryKey + { + UInt128 hash; + UInt64 size; + + bool operator== (const DictionaryKey & other) const { return hash == other.hash && size == other.size; } + }; + + struct DictionaryKeyHash + { + size_t operator()(const DictionaryKey & key) const + { + SipHash hash; + hash.update(key.hash.low); + hash.update(key.hash.high); + hash.update(key.size); + return hash.get64(); + } + }; + + struct CachedValues + { + /// Store ptr to dictionary to be sure it won't be deleted. + ColumnPtr dictionary_holder; + /// Hashes for dictionary keys. + const UInt64 * saved_hash = nullptr; + }; + + using CachedValuesPtr = std::shared_ptr; + + explicit LowCardinalityDictionaryCache(const HashMethodContext::Settings & settings) : cache(settings.max_threads) {} + + CachedValuesPtr get(const DictionaryKey & key) { return cache.get(key); } + void set(const DictionaryKey & key, const CachedValuesPtr & mapped) { cache.set(key, mapped); } + +private: + using Cache = LRUCache; + Cache cache; +}; + +/// Single low cardinality column. +template +struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod +{ + using Base = SingleColumnMethod; + + static HashMethodContextPtr createContext(const HashMethodContext::Settings & settings) + { + return std::make_shared(settings); + } + + ColumnRawPtrs key_columns; + const IColumn * positions = nullptr; + size_t size_of_index_type = 0; + + /// saved hash is from current column or from cache. + const UInt64 * saved_hash = nullptr; + /// Hold dictionary in case saved_hash is from cache to be sure it won't be deleted. + ColumnPtr dictionary_holder; + + /// Cache AggregateDataPtr for current column in order to decrease the number of hash table usages. + PaddedPODArray aggregate_data_cache; + + /// If initialized column is nullable. + bool is_nullable = false; + + static const ColumnLowCardinality & getLowCardinalityColumn(const IColumn * low_cardinality_column) + { + auto column = typeid_cast(low_cardinality_column); + if (!column) + throw Exception("Invalid aggregation key type for HashMethodSingleLowCardinalityColumn method. " + "Excepted LowCardinality, got " + column->getName(), ErrorCodes::LOGICAL_ERROR); + return *column; + } + + HashMethodSingleLowCardinalityColumn( + const ColumnRawPtrs & key_columns_low_cardinality, const Sizes & key_sizes, const HashMethodContextPtr & context) + : Base({getLowCardinalityColumn(key_columns_low_cardinality[0]).getDictionary().getNestedNotNullableColumn().get()}, key_sizes, context) + { + auto column = &getLowCardinalityColumn(key_columns_low_cardinality[0]); + + if (!context) + throw Exception("Cache wasn't created for HashMethodSingleLowCardinalityColumn", + ErrorCodes::LOGICAL_ERROR); + + LowCardinalityDictionaryCache * cache; + if constexpr (use_cache) + { + cache = typeid_cast(context.get()); + if (!cache) + { + const auto & cached_val = *context; + throw Exception("Invalid type for HashMethodSingleLowCardinalityColumn cache: " + + demangle(typeid(cached_val).name()), ErrorCodes::LOGICAL_ERROR); + } + } + + auto * dict = column->getDictionary().getNestedNotNullableColumn().get(); + is_nullable = column->getDictionary().nestedColumnIsNullable(); + key_columns = {dict}; + bool is_shared_dict = column->isSharedDictionary(); + + typename LowCardinalityDictionaryCache::DictionaryKey dictionary_key; + typename LowCardinalityDictionaryCache::CachedValuesPtr cached_values; + + if (is_shared_dict) + { + dictionary_key = {column->getDictionary().getHash(), dict->size()}; + if constexpr (use_cache) + cached_values = cache->get(dictionary_key); + } + + if (cached_values) + { + saved_hash = cached_values->saved_hash; + dictionary_holder = cached_values->dictionary_holder; + } + else + { + saved_hash = column->getDictionary().tryGetSavedHash(); + dictionary_holder = column->getDictionaryPtr(); + + if constexpr (use_cache) + { + if (is_shared_dict) + { + cached_values = std::make_shared(); + cached_values->saved_hash = saved_hash; + cached_values->dictionary_holder = dictionary_holder; + + cache->set(dictionary_key, cached_values); + } + } + } + + AggregateDataPtr default_data = nullptr; + aggregate_data_cache.assign(key_columns[0]->size(), default_data); + + size_of_index_type = column->getSizeOfIndexType(); + positions = column->getIndexesPtr().get(); + } + + ALWAYS_INLINE size_t getIndexAt(size_t row) const + { + switch (size_of_index_type) + { + case sizeof(UInt8): return static_cast(positions)->getElement(row); + case sizeof(UInt16): return static_cast(positions)->getElement(row); + case sizeof(UInt32): return static_cast(positions)->getElement(row); + case sizeof(UInt64): return static_cast(positions)->getElement(row); + default: throw Exception("Unexpected size of index type for low cardinality column.", ErrorCodes::LOGICAL_ERROR); + } + } + + /// Get the key from the key columns for insertion into the hash table. + ALWAYS_INLINE auto getKey(size_t row) const + { + return Base::getKey(getIndexAt(row)); + } + + template + ALWAYS_INLINE typename HashTableTraits::Mapped emplaceKey(Data & data, size_t row_, bool & inserted, Arena & pool) + { + size_t row = getIndexAt(row_); + + if (is_nullable && row == 0) + { + inserted = !data.hasNullKeyData(); + data.hasNullKeyData() = true; + return &data.getNullKeyData(); + } + + if constexpr (use_cache) + { + if (aggregate_data_cache[row]) + { + inserted = false; + return &aggregate_data_cache[row]; + } + } + + Sizes key_sizes; + auto key = getKey(row_); + + typename Data::iterator it; + if (saved_hash) + data.emplace(key, it, inserted, saved_hash[row]); + else + data.emplace(key, it, inserted); + + if (inserted) + Base::onNewKey(*it, pool); + else if constexpr (use_cache) + aggregate_data_cache[row] = it->second; + + return HashTableTraits::getMapped(*it); + } + + ALWAYS_INLINE bool isNullAt(size_t i) + { + if (!is_nullable) + return false; + + return getIndexAt(i) == 0; + } + + template + ALWAYS_INLINE void cacheData(size_t i, Mapped mapped) + { + size_t row = getIndexAt(i); + aggregate_data_cache[row] = mapped; + } + + template + ALWAYS_INLINE typename HashTableTraits::Mapped findFromRow(Data & data, size_t row_, bool & found, Arena &) + { + size_t row = getIndexAt(row_); + + if (is_nullable && row == 0) + return data.hasNullKeyData() ? &data.getNullKeyData() : nullptr; + + if constexpr (use_cache) + { + if (aggregate_data_cache[row]) + return &aggregate_data_cache[row]; + } + + auto key = getKey(row_); + + typename Data::iterator it; + if (saved_hash) + it = data.find(key, saved_hash[row]); + else + it = data.find(key); + + found = it != data.end(); + if constexpr (use_cache) + { + if (found) + aggregate_data_cache[row] = it->second; + } + + return typename HashTableTraits::getMapped(*it); + } + + template + ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & pool) + { + row = getIndexAt(row); + if (saved_hash) + return saved_hash[row]; + + return Base::getHash(data, row, pool); + } + + template + static void insertKeyIntoColumns(const Value & value, MutableColumns & key_columns_low_cardinality, const Sizes & /*key_sizes*/) + { + auto ref = Base::getValueRef(value); + static_cast(key_columns_low_cardinality[0].get())->insertData(ref.data, ref.size); + } +}; + + +namespace columns_hashing_impl +{ + +/// This class is designed to provide the functionality that is required for +/// supporting nullable keys in HashMethodKeysFixed. If there are +/// no nullable keys, this class is merely implemented as an empty shell. +template +class BaseStateKeysFixed; + +/// Case where nullable keys are supported. +template +class BaseStateKeysFixed +{ +protected: + void init(const ColumnRawPtrs & key_columns) + { + null_maps.reserve(key_columns.size()); + actual_columns.reserve(key_columns.size()); + + for (const auto & col : key_columns) + { + if (col->isColumnNullable()) + { + const auto & nullable_col = static_cast(*col); + actual_columns.push_back(&nullable_col.getNestedColumn()); + null_maps.push_back(&nullable_col.getNullMapColumn()); + } + else + { + actual_columns.push_back(col); + null_maps.push_back(nullptr); + } + } + } + + /// Return the columns which actually contain the values of the keys. + /// For a given key column, if it is nullable, we return its nested + /// column. Otherwise we return the key column itself. + inline const ColumnRawPtrs & getActualColumns() const + { + return actual_columns; + } + + /// Create a bitmap that indicates whether, for a particular row, + /// a key column bears a null value or not. + KeysNullMap createBitmap(size_t row) const + { + KeysNullMap bitmap{}; + + for (size_t k = 0; k < null_maps.size(); ++k) + { + if (null_maps[k] != nullptr) + { + const auto & null_map = static_cast(*null_maps[k]).getData(); + if (null_map[row] == 1) + { + size_t bucket = k / 8; + size_t offset = k % 8; + bitmap[bucket] |= UInt8(1) << offset; + } + } + } + + return bitmap; + } + +private: + ColumnRawPtrs actual_columns; + ColumnRawPtrs null_maps; +}; + +/// Case where nullable keys are not supported. +template +class BaseStateKeysFixed +{ +protected: + void init(const ColumnRawPtrs & columns) { actual_columns = columns; } + + const ColumnRawPtrs & getActualColumns() const { return actual_columns; } + + KeysNullMap createBitmap(size_t) const + { + throw Exception{"Internal error: calling createBitmap() for non-nullable keys" + " is forbidden", ErrorCodes::LOGICAL_ERROR}; + } + +private: + ColumnRawPtrs actual_columns; +}; + +} + +// Optional mask for low cardinality columns. +template +struct LowCardinalityKeys +{ + ColumnRawPtrs nested_columns; + ColumnRawPtrs positions; + Sizes position_sizes; +}; + +template <> +struct LowCardinalityKeys {}; + +/// For the case where all keys are of fixed length, and they fit in N (for example, 128) bits. +template +struct HashMethodKeysFixed : private columns_hashing_impl::BaseStateKeysFixed +{ + using Key = typename TData::key_type; + + static constexpr bool has_nullable_keys = has_nullable_keys_; + static constexpr bool has_low_cardinality = has_low_cardinality_; + + LowCardinalityKeys low_cardinality_keys; + Sizes key_sizes; + size_t keys_size; + + LastElementCache last_elem_cache; + + using Base = columns_hashing_impl::BaseStateKeysFixed; + + HashMethodKeysFixed(const ColumnRawPtrs & key_columns, const Sizes & key_sizes, const HashMethodContextPtr &) + : key_sizes(std::move(key_sizes)), keys_size(key_columns.size()) + { + if constexpr (has_low_cardinality) + { + low_cardinality_keys.nested_columns.resize(key_columns.size()); + low_cardinality_keys.positions.assign(key_columns.size(), nullptr); + low_cardinality_keys.position_sizes.resize(key_columns.size()); + for (size_t i = 0; i < key_columns.size(); ++i) + { + if (auto * low_cardinality_col = typeid_cast(key_columns[i])) + { + low_cardinality_keys.nested_columns[i] = low_cardinality_col->getDictionary().getNestedColumn().get(); + low_cardinality_keys.positions[i] = &low_cardinality_col->getIndexes(); + low_cardinality_keys.position_sizes[i] = low_cardinality_col->getSizeOfIndexType(); + } + else + low_cardinality_keys.nested_columns[i] = key_columns[i]; + } + } + + Base::init(key_columns); + } + + static HashMethodContextPtr createContext(const HashMethodContext::Settings &) { return nullptr; } + + ALWAYS_INLINE Key getKey(size_t row) const + { + if (has_nullable_keys) + { + auto bitmap = Base::createBitmap(row); + return packFixed(row, keys_size, Base::getActualColumns(), key_sizes, bitmap); + } + else + { + if constexpr (has_low_cardinality) + return packFixed(row, keys_size, low_cardinality_keys.nested_columns, key_sizes, + &low_cardinality_keys.positions, &low_cardinality_keys.position_sizes); + + return packFixed(row, keys_size, Base::getActualColumns(), key_sizes); + } + } + + template + ALWAYS_INLINE typename HashTableTraits::Mapped emplaceKey(Data & data, size_t row, bool & inserted, Arena & /*pool*/) + { + return HashTableTraits::getMapped(emplaceKeyImpl(getKey(row), data, inserted, last_elem_cache)); + } + + template + ALWAYS_INLINE typename HashTableTraits::Mapped findKey(Data & data, size_t row, bool & found, Arena & /*pool*/) + { + return findKeyImpl(getKey(row), data, found, last_elem_cache); + } + + template + static StringRef getValueRef(const Value & value) + { + return StringRef(value.first.data, value.first.size); + } + + template + ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & /*pool*/) + { + return data.hash(getKey(row)); + } + + template + ALWAYS_INLINE void cacheData(size_t /*row*/, Mapped mapped) + { + *last_elem_cache.getMapped() = mapped; + } +}; + +/** Hash by concatenating serialized key values. + * The serialized value differs in that it uniquely allows to deserialize it, having only the position with which it starts. + * That is, for example, for strings, it contains first the serialized length of the string, and then the bytes. + * Therefore, when aggregating by several strings, there is no ambiguity. + */ +template +struct HashMethodSerialized +{ + ColumnRawPtrs key_columns; + size_t keys_size; + LastElementCache last_elem_cache; + + HashMethodSerialized(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) + : key_columns(key_columns), keys_size(key_columns.size()) {} + + static HashMethodContextPtr createContext(const HashMethodContext::Settings &) { return nullptr; } + + template + ALWAYS_INLINE typename HashTableTraits::Mapped emplaceKey(Data & data, size_t row, bool & inserted, Arena & pool) + { + auto key = getKey(row, pool); + auto & value = emplaceKeyImpl(key, data, inserted, last_elem_cache); + if (!inserted) + pool.rollback(key.size); + + return HashTableTraits::getMapped(value); + } + + template + ALWAYS_INLINE typename HashTableTraits::Mapped findKey(Data & data, size_t row, bool & found, Arena & pool) + { + auto key = getKey(row, pool); + auto mapped = findKeyImpl(key, data, found, last_elem_cache); + pool.rollback(key.size); + + return mapped; + } + + template + ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & pool) + { + auto key = getKey(row, pool); + auto hash = data.hash(key); + pool.rollback(key.size); + + return hash; + } + + template + ALWAYS_INLINE void cacheData(size_t /*row*/, Mapped /*mapped*/) {} + +protected: + ALWAYS_INLINE StringRef getKey(size_t row, Arena & pool) const + { + return serializeKeysToPoolContiguous(row, keys_size, key_columns, pool); + } +}; + +} +} diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 145ce98dbbc..785345f9400 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -6,9 +6,11 @@ #include #include #include +#include #include #include #include +#include #include #include #include @@ -22,11 +24,9 @@ #include #include #include + #if __has_include() #include -#include -#include - #endif @@ -188,7 +188,7 @@ Aggregator::Aggregator(const Params & params_) } method_chosen = chooseAggregationMethod(); - AggregationStateCache::Settings cache_settings; + HashMethodContext::Settings cache_settings; cache_settings.max_threads = params.max_threads; aggregation_state_cache = AggregatedDataVariants::createCache(method_chosen, cache_settings); } @@ -586,11 +586,7 @@ void NO_INLINE Aggregator::executeImpl( bool no_more_keys, AggregateDataPtr overflow_row) const { - typename Method::State state; - if constexpr (Method::low_cardinality_optimization) - state.init(key_columns, aggregation_state_cache); - else - state.init(key_columns); + typename Method::State state(key_columns, key_sizes, aggregation_state_cache); if (!no_more_keys) executeImplCase(method, state, aggregates_pool, rows, key_columns, aggregate_instructions, keys, overflow_row); @@ -605,76 +601,35 @@ void NO_INLINE Aggregator::executeImplCase( typename Method::State & state, Arena * aggregates_pool, size_t rows, - ColumnRawPtrs & key_columns, + ColumnRawPtrs & /*key_columns*/, AggregateFunctionInstruction * aggregate_instructions, - StringRefs & keys, + StringRefs & /*keys*/, AggregateDataPtr overflow_row) const { /// NOTE When editing this code, also pay attention to SpecializedAggregator.h. /// For all rows. - typename Method::Key prev_key{}; AggregateDataPtr value = nullptr; for (size_t i = 0; i < rows; ++i) { bool inserted = false; /// Inserted a new key, or was this key already? - /// Get the key to insert into the hash table. - typename Method::Key key; - if constexpr (!Method::low_cardinality_optimization) - key = state.getKey(key_columns, params.keys_size, i, key_sizes, keys, *aggregates_pool); - AggregateDataPtr * aggregate_data = nullptr; - typename Method::iterator it; /// Is not used if Method::low_cardinality_optimization - if (!no_more_keys) /// Insert. - { - /// Optimization for consecutive identical keys. - if (!Method::no_consecutive_keys_optimization) - { - if (i != 0 && key == prev_key) - { - /// Add values to the aggregate functions. - for (AggregateFunctionInstruction * inst = aggregate_instructions; inst->that; ++inst) - (*inst->func)(inst->that, value + inst->state_offset, inst->arguments, i, aggregates_pool); - - method.onExistingKey(key, keys, *aggregates_pool); - continue; - } - else - prev_key = key; - } - - if constexpr (Method::low_cardinality_optimization) - aggregate_data = state.emplaceKeyFromRow(method.data, i, inserted, params.keys_size, keys, *aggregates_pool); - else - { - method.data.emplace(key, it, inserted); - aggregate_data = &Method::getAggregateData(it->second); - } - } + if constexpr (!no_more_keys) /// Insert. + aggregate_data = state.emplaceKey(method.data, i, inserted, *aggregates_pool); else { /// Add only if the key already exists. - - if constexpr (Method::low_cardinality_optimization) - aggregate_data = state.findFromRow(method.data, i); - else - { - it = method.data.find(key); - if (method.data.end() != it) - aggregate_data = &Method::getAggregateData(it->second); - } + bool found = false; + aggregate_data = state.findKey(method.data, i, found, *aggregates_pool); } /// aggregate_date == nullptr means that the new key did not fit in the hash table because of no_more_keys. /// If the key does not fit, and the data does not need to be aggregated in a separate row, then there's nothing to do. if (!aggregate_data && !overflow_row) - { - method.onExistingKey(key, keys, *aggregates_pool); continue; - } /// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key. if (inserted) @@ -682,18 +637,12 @@ void NO_INLINE Aggregator::executeImplCase( /// exception-safety - if you can not allocate memory or create states, then destructors will not be called. *aggregate_data = nullptr; - if constexpr (!Method::low_cardinality_optimization) - method.onNewKey(*it, params.keys_size, keys, *aggregates_pool); - AggregateDataPtr place = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); createAggregateStates(place); *aggregate_data = place; - if constexpr (Method::low_cardinality_optimization) - state.cacheAggregateData(i, place); + state.cacheData(i, place); } - else - method.onExistingKey(key, keys, *aggregates_pool); value = aggregate_data ? *aggregate_data : overflow_row; @@ -1172,7 +1121,7 @@ void NO_INLINE Aggregator::convertToBlockImplFinal( for (size_t i = 0; i < params.aggregates_size; ++i) aggregate_functions[i]->insertResultInto( - Method::getAggregateData(value.second) + offsets_of_aggregate_states[i], + value.second + offsets_of_aggregate_states[i], *final_aggregate_columns[i]); } @@ -1203,9 +1152,9 @@ void NO_INLINE Aggregator::convertToBlockImplNotFinal( /// reserved, so push_back does not throw exceptions for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_columns[i]->push_back(Method::getAggregateData(value.second) + offsets_of_aggregate_states[i]); + aggregate_columns[i]->push_back(value.second + offsets_of_aggregate_states[i]); - Method::getAggregateData(value.second) = nullptr; + value.second = nullptr; } } @@ -1549,20 +1498,20 @@ void NO_INLINE Aggregator::mergeDataImpl( { for (size_t i = 0; i < params.aggregates_size; ++i) aggregate_functions[i]->merge( - Method::getAggregateData(res_it->second) + offsets_of_aggregate_states[i], - Method::getAggregateData(it->second) + offsets_of_aggregate_states[i], + res_it->second + offsets_of_aggregate_states[i], + it->second + offsets_of_aggregate_states[i], arena); for (size_t i = 0; i < params.aggregates_size; ++i) aggregate_functions[i]->destroy( - Method::getAggregateData(it->second) + offsets_of_aggregate_states[i]); + it->second + offsets_of_aggregate_states[i]); } else { res_it->second = it->second; } - Method::getAggregateData(it->second) = nullptr; + it->second = nullptr; } table_src.clearAndShrink(); @@ -1586,19 +1535,18 @@ void NO_INLINE Aggregator::mergeDataNoMoreKeysImpl( AggregateDataPtr res_data = table_dst.end() == res_it ? overflows - : Method::getAggregateData(res_it->second); + : res_it->second; for (size_t i = 0; i < params.aggregates_size; ++i) aggregate_functions[i]->merge( res_data + offsets_of_aggregate_states[i], - Method::getAggregateData(it->second) + offsets_of_aggregate_states[i], + it->second + offsets_of_aggregate_states[i], arena); for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_functions[i]->destroy( - Method::getAggregateData(it->second) + offsets_of_aggregate_states[i]); + aggregate_functions[i]->destroy(it->second + offsets_of_aggregate_states[i]); - Method::getAggregateData(it->second) = nullptr; + it->second = nullptr; } table_src.clearAndShrink(); @@ -1621,19 +1569,18 @@ void NO_INLINE Aggregator::mergeDataOnlyExistingKeysImpl( if (table_dst.end() == res_it) continue; - AggregateDataPtr res_data = Method::getAggregateData(res_it->second); + AggregateDataPtr res_data = res_it->second; for (size_t i = 0; i < params.aggregates_size; ++i) aggregate_functions[i]->merge( res_data + offsets_of_aggregate_states[i], - Method::getAggregateData(it->second) + offsets_of_aggregate_states[i], + it->second + offsets_of_aggregate_states[i], arena); for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_functions[i]->destroy( - Method::getAggregateData(it->second) + offsets_of_aggregate_states[i]); + aggregate_functions[i]->destroy(it->second + offsets_of_aggregate_states[i]); - Method::getAggregateData(it->second) = nullptr; + it->second = nullptr; } table_src.clearAndShrink(); @@ -1984,7 +1931,7 @@ template void NO_INLINE Aggregator::mergeStreamsImplCase( Block & block, Arena * aggregates_pool, - Method & method, + Method & method [[maybe_unused]], Table & data, AggregateDataPtr overflow_row) const { @@ -1998,14 +1945,9 @@ void NO_INLINE Aggregator::mergeStreamsImplCase( for (size_t i = 0; i < params.aggregates_size; ++i) aggregate_columns[i] = &typeid_cast(*block.safeGetByPosition(params.keys_size + i).column).getData(); - typename Method::State state; - if constexpr (Method::low_cardinality_optimization) - state.init(key_columns, aggregation_state_cache); - else - state.init(key_columns); + typename Method::State state(key_columns, key_sizes, aggregation_state_cache); /// For all rows. - StringRefs keys(params.keys_size); size_t rows = block.rows(); for (size_t i = 0; i < rows; ++i) { @@ -2014,59 +1956,31 @@ void NO_INLINE Aggregator::mergeStreamsImplCase( bool inserted = false; /// Inserted a new key, or was this key already? - /// Get the key to insert into the hash table. - typename Method::Key key; - if constexpr (!Method::low_cardinality_optimization) - key = state.getKey(key_columns, params.keys_size, i, key_sizes, keys, *aggregates_pool); - if (!no_more_keys) - { - if constexpr (Method::low_cardinality_optimization) - aggregate_data = state.emplaceKeyFromRow(data, i, inserted, params.keys_size, keys, *aggregates_pool); - else - { - data.emplace(key, it, inserted); - aggregate_data = &Method::getAggregateData(it->second); - } - } + aggregate_data = state.emplaceKey(data, i, inserted, *aggregates_pool); else { - if constexpr (Method::low_cardinality_optimization) - aggregate_data = state.findFromRow(data, i); - else - { - it = data.find(key); - if (data.end() != it) - aggregate_data = &Method::getAggregateData(it->second); - } + bool found; + aggregate_data = state.findKey(data, i, found, *aggregates_pool); } /// aggregate_date == nullptr means that the new key did not fit in the hash table because of no_more_keys. /// If the key does not fit, and the data does not need to be aggregated into a separate row, then there's nothing to do. if (!aggregate_data && !overflow_row) - { - method.onExistingKey(key, keys, *aggregates_pool); continue; - } /// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key. if (inserted) { *aggregate_data = nullptr; - if constexpr (!Method::low_cardinality_optimization) - method.onNewKey(*it, params.keys_size, keys, *aggregates_pool); - AggregateDataPtr place = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); createAggregateStates(place); *aggregate_data = place; - if constexpr (Method::low_cardinality_optimization) - state.cacheAggregateData(i, place); + state.cacheData(i, place); } - else - method.onExistingKey(key, keys, *aggregates_pool); AggregateDataPtr value = aggregate_data ? *aggregate_data : overflow_row; @@ -2163,7 +2077,7 @@ void Aggregator::mergeStream(const BlockInputStreamPtr & stream, AggregatedDataV * If there is at least one block with a bucket number greater or equal than zero, then there was a two-level aggregation. */ auto max_bucket = bucket_to_blocks.rbegin()->first; - size_t has_two_level = max_bucket >= 0; + bool has_two_level = max_bucket >= 0; if (has_two_level) { @@ -2393,15 +2307,11 @@ void NO_INLINE Aggregator::convertBlockToTwoLevelImpl( Method & method, Arena * pool, ColumnRawPtrs & key_columns, - StringRefs & keys, + StringRefs & keys [[maybe_unused]], const Block & source, std::vector & destinations) const { - typename Method::State state; - if constexpr (Method::low_cardinality_optimization) - state.init(key_columns, aggregation_state_cache); - else - state.init(key_columns); + typename Method::State state(key_columns, key_sizes, aggregation_state_cache); size_t rows = source.rows(); size_t columns = source.columns(); @@ -2421,16 +2331,11 @@ void NO_INLINE Aggregator::convertBlockToTwoLevelImpl( } } - /// Obtain a key. Calculate bucket number from it. - typename Method::Key key = state.getKey(key_columns, params.keys_size, i, key_sizes, keys, *pool); - - auto hash = method.data.hash(key); + /// Calculate bucket number from row hash. + auto hash = state.getHash(method.data, i, *pool); auto bucket = method.data.getBucketFromHash(hash); selector[i] = bucket; - - /// We don't need to store this key in pool. - method.onExistingKey(key, keys, *pool); } size_t num_buckets = destinations.size(); @@ -2521,7 +2426,7 @@ void NO_INLINE Aggregator::destroyImpl(Table & table) const { for (auto elem : table) { - AggregateDataPtr & data = Method::getAggregateData(elem.second); + AggregateDataPtr & data = elem.second; /** If an exception (usually a lack of memory, the MemoryTracker throws) arose * after inserting the key into a hash table, but before creating all states of aggregate functions, diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index f51f620064f..0b40f4e6a25 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -138,18 +139,6 @@ using AggregatedDataWithNullableStringKeyTwoLevel = AggregationDataWithNullKeyTw TwoLevelHashMapWithSavedHash, TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>; -/// Cache which can be used by aggregations method's states. Object is shared in all threads. -struct AggregationStateCache -{ - virtual ~AggregationStateCache() = default; - - struct Settings - { - size_t max_threads; - }; -}; - -using AggregationStateCachePtr = std::shared_ptr; /// For the case where there is one numeric key. template /// UInt8/16/32/64 for any type with corresponding bit width. @@ -169,65 +158,16 @@ struct AggregationMethodOneNumber AggregationMethodOneNumber(const Other & other) : data(other.data) {} /// To use one `Method` in different threads, use different `State`. - struct State - { - const char * vec; + using State = ColumnsHashing::HashMethodOneNumber; - /** Called at the start of each block processing. - * Sets the variables needed for the other methods called in inner loops. - */ - void init(ColumnRawPtrs & key_columns) - { - vec = key_columns[0]->getRawData().data; - } - - /// Get the key from the key columns for insertion into the hash table. - ALWAYS_INLINE Key getKey( - const ColumnRawPtrs & /*key_columns*/, - size_t /*keys_size*/, /// Number of key columns. - size_t i, /// From which row of the block, get the key. - const Sizes & /*key_sizes*/, /// If the keys of a fixed length - their lengths. It is not used in aggregation methods for variable length keys. - StringRefs & /*keys*/, /// Here references to key data in columns can be written. They can be used in the future. - Arena & /*pool*/) const - { - return unalignedLoad(vec + i * sizeof(FieldType)); - } - }; - - /// From the value in the hash table, get AggregateDataPtr. - static AggregateDataPtr & getAggregateData(Mapped & value) { return value; } - static const AggregateDataPtr & getAggregateData(const Mapped & value) { return value; } - - /** Place additional data, if necessary, in case a new key was inserted into the hash table. - */ - static ALWAYS_INLINE void onNewKey(typename Data::value_type & /*value*/, size_t /*keys_size*/, StringRefs & /*keys*/, Arena & /*pool*/) - { - } - - /** The action to be taken if the key is not new. For example, roll back the memory allocation in the pool. - */ - static ALWAYS_INLINE void onExistingKey(const Key & /*key*/, StringRefs & /*keys*/, Arena & /*pool*/) {} - - /** Do not use optimization for consecutive keys. - */ - static const bool no_consecutive_keys_optimization = false; /// Use optimization for low cardinality. static const bool low_cardinality_optimization = false; - /** Insert the key from the hash table into columns. - */ + // Insert the key from the hash table into columns. static void insertKeyIntoColumns(const typename Data::value_type & value, MutableColumns & key_columns, const Sizes & /*key_sizes*/) { static_cast(key_columns[0].get())->insertRawData(reinterpret_cast(&value.first)); } - - /// Get StringRef from value which can be inserted into column. - static StringRef getValueRef(const typename Data::value_type & value) - { - return StringRef(reinterpret_cast(&value.first), sizeof(value.first)); - } - - static AggregationStateCachePtr createCache(const AggregationStateCache::Settings & /*settings*/) { return nullptr; } }; @@ -248,58 +188,14 @@ struct AggregationMethodString template AggregationMethodString(const Other & other) : data(other.data) {} - struct State - { - const IColumn::Offset * offsets; - const UInt8 * chars; + using State = ColumnsHashing::HashMethodString; - void init(ColumnRawPtrs & key_columns) - { - const IColumn & column = *key_columns[0]; - const ColumnString & column_string = static_cast(column); - offsets = column_string.getOffsets().data(); - chars = column_string.getChars().data(); - } - - ALWAYS_INLINE Key getKey( - const ColumnRawPtrs & /*key_columns*/, - size_t /*keys_size*/, - ssize_t i, - const Sizes & /*key_sizes*/, - StringRefs & /*keys*/, - Arena & /*pool*/) const - { - return StringRef( - chars + offsets[i - 1], - offsets[i] - offsets[i - 1] - 1); - } - }; - - static AggregateDataPtr & getAggregateData(Mapped & value) { return value; } - static const AggregateDataPtr & getAggregateData(const Mapped & value) { return value; } - - static ALWAYS_INLINE void onNewKey(typename Data::value_type & value, size_t /*keys_size*/, StringRefs & /*keys*/, Arena & pool) - { - if (value.first.size) - value.first.data = pool.insert(value.first.data, value.first.size); - } - - static ALWAYS_INLINE void onExistingKey(const Key & /*key*/, StringRefs & /*keys*/, Arena & /*pool*/) {} - - static const bool no_consecutive_keys_optimization = false; static const bool low_cardinality_optimization = false; - static StringRef getValueRef(const typename Data::value_type & value) - { - return StringRef(value.first.data, value.first.size); - } - static void insertKeyIntoColumns(const typename Data::value_type & value, MutableColumns & key_columns, const Sizes &) { key_columns[0]->insertData(value.first.data, value.first.size); } - - static AggregationStateCachePtr createCache(const AggregationStateCache::Settings & /*settings*/) { return nullptr; } }; @@ -320,101 +216,14 @@ struct AggregationMethodFixedString template AggregationMethodFixedString(const Other & other) : data(other.data) {} - struct State - { - size_t n; - const ColumnFixedString::Chars * chars; + using State = ColumnsHashing::HashMethodFixedString; - void init(ColumnRawPtrs & key_columns) - { - const IColumn & column = *key_columns[0]; - const ColumnFixedString & column_string = static_cast(column); - n = column_string.getN(); - chars = &column_string.getChars(); - } - - ALWAYS_INLINE Key getKey( - const ColumnRawPtrs &, - size_t, - size_t i, - const Sizes &, - StringRefs &, - Arena &) const - { - return StringRef(&(*chars)[i * n], n); - } - }; - - static AggregateDataPtr & getAggregateData(Mapped & value) { return value; } - static const AggregateDataPtr & getAggregateData(const Mapped & value) { return value; } - - static ALWAYS_INLINE void onNewKey(typename Data::value_type & value, size_t, StringRefs &, Arena & pool) - { - value.first.data = pool.insert(value.first.data, value.first.size); - } - - static ALWAYS_INLINE void onExistingKey(const Key &, StringRefs &, Arena &) {} - - static const bool no_consecutive_keys_optimization = false; static const bool low_cardinality_optimization = false; - static StringRef getValueRef(const typename Data::value_type & value) - { - return StringRef(value.first.data, value.first.size); - } - static void insertKeyIntoColumns(const typename Data::value_type & value, MutableColumns & key_columns, const Sizes &) { key_columns[0]->insertData(value.first.data, value.first.size); } - - static AggregationStateCachePtr createCache(const AggregationStateCache::Settings & /*settings*/) { return nullptr; } -}; - -/// Cache stores dictionaries and saved_hash per dictionary key. -class LowCardinalityDictionaryCache : public AggregationStateCache -{ -public: - /// Will assume that dictionaries with same hash has the same keys. - /// Just in case, check that they have also the same size. - struct DictionaryKey - { - UInt128 hash; - UInt64 size; - - bool operator== (const DictionaryKey & other) const { return hash == other.hash && size == other.size; } - }; - - struct DictionaryKeyHash - { - size_t operator()(const DictionaryKey & key) const - { - SipHash hash; - hash.update(key.hash.low); - hash.update(key.hash.high); - hash.update(key.size); - return hash.get64(); - } - }; - - struct CachedValues - { - /// Store ptr to dictionary to be sure it won't be deleted. - ColumnPtr dictionary_holder; - /// Hashes for dictionary keys. - const UInt64 * saved_hash = nullptr; - }; - - using CachedValuesPtr = std::shared_ptr; - - explicit LowCardinalityDictionaryCache(const AggregationStateCache::Settings & settings) : cache(settings.max_threads) {} - - CachedValuesPtr get(const DictionaryKey & key) { return cache.get(key); } - void set(const DictionaryKey & key, const CachedValuesPtr & mapped) { cache.set(key, mapped); } - -private: - using Cache = LRUCache; - Cache cache; }; /// Single low cardinality column. @@ -432,342 +241,23 @@ struct AggregationMethodSingleLowCardinalityColumn : public SingleColumnMethod using Base::data; - static AggregationStateCachePtr createCache(const AggregationStateCache::Settings & settings) - { - return std::make_shared(settings); - } - AggregationMethodSingleLowCardinalityColumn() = default; template explicit AggregationMethodSingleLowCardinalityColumn(const Other & other) : Base(other) {} - struct State : public BaseState - { - ColumnRawPtrs key_columns; - const IColumn * positions = nullptr; - size_t size_of_index_type = 0; + using State = ColumnsHashing::HashMethodSingleLowCardinalityColumn; - /// saved hash is from current column or from cache. - const UInt64 * saved_hash = nullptr; - /// Hold dictionary in case saved_hash is from cache to be sure it won't be deleted. - ColumnPtr dictionary_holder; - - /// Cache AggregateDataPtr for current column in order to decrease the number of hash table usages. - PaddedPODArray aggregate_data_cache; - - /// If initialized column is nullable. - bool is_nullable = false; - - void init(ColumnRawPtrs &) - { - throw Exception("Expected cache for AggregationMethodSingleLowCardinalityColumn::init", ErrorCodes::LOGICAL_ERROR); - } - - void init(ColumnRawPtrs & key_columns_low_cardinality, const AggregationStateCachePtr & cache_ptr) - { - auto column = typeid_cast(key_columns_low_cardinality[0]); - if (!column) - throw Exception("Invalid aggregation key type for AggregationMethodSingleLowCardinalityColumn method. " - "Excepted LowCardinality, got " + key_columns_low_cardinality[0]->getName(), ErrorCodes::LOGICAL_ERROR); - - if (!cache_ptr) - throw Exception("Cache wasn't created for AggregationMethodSingleLowCardinalityColumn", ErrorCodes::LOGICAL_ERROR); - - auto cache = typeid_cast(cache_ptr.get()); - if (!cache) - { - const auto & cached_val = *cache_ptr; - throw Exception("Invalid type for AggregationMethodSingleLowCardinalityColumn cache: " - + demangle(typeid(cached_val).name()), ErrorCodes::LOGICAL_ERROR); - } - - auto * dict = column->getDictionary().getNestedNotNullableColumn().get(); - is_nullable = column->getDictionary().nestedColumnIsNullable(); - key_columns = {dict}; - bool is_shared_dict = column->isSharedDictionary(); - - typename LowCardinalityDictionaryCache::DictionaryKey dictionary_key; - typename LowCardinalityDictionaryCache::CachedValuesPtr cached_values; - - if (is_shared_dict) - { - dictionary_key = {column->getDictionary().getHash(), dict->size()}; - cached_values = cache->get(dictionary_key); - } - - if (cached_values) - { - saved_hash = cached_values->saved_hash; - dictionary_holder = cached_values->dictionary_holder; - } - else - { - saved_hash = column->getDictionary().tryGetSavedHash(); - dictionary_holder = column->getDictionaryPtr(); - - if (is_shared_dict) - { - cached_values = std::make_shared(); - cached_values->saved_hash = saved_hash; - cached_values->dictionary_holder = dictionary_holder; - - cache->set(dictionary_key, cached_values); - } - } - - AggregateDataPtr default_data = nullptr; - aggregate_data_cache.assign(key_columns[0]->size(), default_data); - - size_of_index_type = column->getSizeOfIndexType(); - positions = column->getIndexesPtr().get(); - - BaseState::init(key_columns); - } - - ALWAYS_INLINE size_t getIndexAt(size_t row) const - { - switch (size_of_index_type) - { - case sizeof(UInt8): return static_cast(positions)->getElement(row); - case sizeof(UInt16): return static_cast(positions)->getElement(row); - case sizeof(UInt32): return static_cast(positions)->getElement(row); - case sizeof(UInt64): return static_cast(positions)->getElement(row); - default: throw Exception("Unexpected size of index type for low cardinality column.", ErrorCodes::LOGICAL_ERROR); - } - } - - /// Get the key from the key columns for insertion into the hash table. - ALWAYS_INLINE Key getKey( - const ColumnRawPtrs & /*key_columns*/, - size_t /*keys_size*/, - size_t i, - const Sizes & key_sizes, - StringRefs & keys, - Arena & pool) const - { - size_t row = getIndexAt(i); - return BaseState::getKey(key_columns, 1, row, key_sizes, keys, pool); - } - - template - ALWAYS_INLINE AggregateDataPtr * emplaceKeyFromRow( - D & data, - size_t i, - bool & inserted, - size_t keys_size, - StringRefs & keys, - Arena & pool) - { - size_t row = getIndexAt(i); - - if (is_nullable && row == 0) - { - inserted = !data.hasNullKeyData(); - data.hasNullKeyData() = true; - return &data.getNullKeyData(); - } - - if (aggregate_data_cache[row]) - { - inserted = false; - return &aggregate_data_cache[row]; - } - else - { - Sizes key_sizes; - auto key = getKey({}, 0, i, key_sizes, keys, pool); - - typename D::iterator it; - if (saved_hash) - data.emplace(key, it, inserted, saved_hash[row]); - else - data.emplace(key, it, inserted); - - if (inserted) - Base::onNewKey(*it, keys_size, keys, pool); - else - aggregate_data_cache[row] = Base::getAggregateData(it->second); - - return &Base::getAggregateData(it->second); - } - } - - ALWAYS_INLINE bool isNullAt(size_t i) - { - if (!is_nullable) - return false; - - return getIndexAt(i) == 0; - } - - ALWAYS_INLINE void cacheAggregateData(size_t i, AggregateDataPtr data) - { - size_t row = getIndexAt(i); - aggregate_data_cache[row] = data; - } - - template - ALWAYS_INLINE AggregateDataPtr * findFromRow(D & data, size_t i) - { - size_t row = getIndexAt(i); - - if (is_nullable && row == 0) - return data.hasNullKeyData() ? &data.getNullKeyData() : nullptr; - - if (!aggregate_data_cache[row]) - { - Sizes key_sizes; - StringRefs keys; - Arena pool; - auto key = getKey({}, 0, i, key_sizes, keys, pool); - - typename D::iterator it; - if (saved_hash) - it = data.find(key, saved_hash[row]); - else - it = data.find(key); - - if (it != data.end()) - aggregate_data_cache[row] = Base::getAggregateData(it->second); - } - return &aggregate_data_cache[row]; - } - }; - - static AggregateDataPtr & getAggregateData(Mapped & value) { return Base::getAggregateData(value); } - static const AggregateDataPtr & getAggregateData(const Mapped & value) { return Base::getAggregateData(value); } - - static void onNewKey(typename Data::value_type & value, size_t keys_size, StringRefs & keys, Arena & pool) - { - return Base::onNewKey(value, keys_size, keys, pool); - } - - static void onExistingKey(const Key & key, StringRefs & keys, Arena & pool) - { - return Base::onExistingKey(key, keys, pool); - } - - static const bool no_consecutive_keys_optimization = true; static const bool low_cardinality_optimization = true; static void insertKeyIntoColumns(const typename Data::value_type & value, MutableColumns & key_columns_low_cardinality, const Sizes & /*key_sizes*/) { - auto ref = Base::getValueRef(value); + auto ref = BaseState::getValueRef(value); static_cast(key_columns_low_cardinality[0].get())->insertData(ref.data, ref.size); } }; -namespace aggregator_impl -{ - -/// This class is designed to provide the functionality that is required for -/// supporting nullable keys in AggregationMethodKeysFixed. If there are -/// no nullable keys, this class is merely implemented as an empty shell. -template -class BaseStateKeysFixed; - -/// Case where nullable keys are supported. -template -class BaseStateKeysFixed -{ -protected: - void init(const ColumnRawPtrs & key_columns) - { - null_maps.reserve(key_columns.size()); - actual_columns.reserve(key_columns.size()); - - for (const auto & col : key_columns) - { - if (col->isColumnNullable()) - { - const auto & nullable_col = static_cast(*col); - actual_columns.push_back(&nullable_col.getNestedColumn()); - null_maps.push_back(&nullable_col.getNullMapColumn()); - } - else - { - actual_columns.push_back(col); - null_maps.push_back(nullptr); - } - } - } - - /// Return the columns which actually contain the values of the keys. - /// For a given key column, if it is nullable, we return its nested - /// column. Otherwise we return the key column itself. - inline const ColumnRawPtrs & getActualColumns() const - { - return actual_columns; - } - - /// Create a bitmap that indicates whether, for a particular row, - /// a key column bears a null value or not. - KeysNullMap createBitmap(size_t row) const - { - KeysNullMap bitmap{}; - - for (size_t k = 0; k < null_maps.size(); ++k) - { - if (null_maps[k] != nullptr) - { - const auto & null_map = static_cast(*null_maps[k]).getData(); - if (null_map[row] == 1) - { - size_t bucket = k / 8; - size_t offset = k % 8; - bitmap[bucket] |= UInt8(1) << offset; - } - } - } - - return bitmap; - } - -private: - ColumnRawPtrs actual_columns; - ColumnRawPtrs null_maps; -}; - -/// Case where nullable keys are not supported. -template -class BaseStateKeysFixed -{ -protected: - void init(const ColumnRawPtrs &) - { - throw Exception{"Internal error: calling init() for non-nullable" - " keys is forbidden", ErrorCodes::LOGICAL_ERROR}; - } - - const ColumnRawPtrs & getActualColumns() const - { - throw Exception{"Internal error: calling getActualColumns() for non-nullable" - " keys is forbidden", ErrorCodes::LOGICAL_ERROR}; - } - - KeysNullMap createBitmap(size_t) const - { - throw Exception{"Internal error: calling createBitmap() for non-nullable keys" - " is forbidden", ErrorCodes::LOGICAL_ERROR}; - } -}; - -} - -// Oprional mask for low cardinality columns. -template -struct LowCardinalityKeys -{ - ColumnRawPtrs nested_columns; - ColumnRawPtrs positions; - Sizes position_sizes; -}; - -template <> -struct LowCardinalityKeys {}; - /// For the case where all keys are of fixed length, and they fit in N (for example, 128) bits. template struct AggregationMethodKeysFixed @@ -787,71 +277,8 @@ struct AggregationMethodKeysFixed template AggregationMethodKeysFixed(const Other & other) : data(other.data) {} - class State final : private aggregator_impl::BaseStateKeysFixed - { - LowCardinalityKeys low_cardinality_keys; + using State = ColumnsHashing::HashMethodKeysFixed; - public: - using Base = aggregator_impl::BaseStateKeysFixed; - - void init(ColumnRawPtrs & key_columns) - { - if constexpr (has_low_cardinality) - { - low_cardinality_keys.nested_columns.resize(key_columns.size()); - low_cardinality_keys.positions.assign(key_columns.size(), nullptr); - low_cardinality_keys.position_sizes.resize(key_columns.size()); - for (size_t i = 0; i < key_columns.size(); ++i) - { - if (auto * low_cardinality_col = typeid_cast(key_columns[i])) - { - low_cardinality_keys.nested_columns[i] = low_cardinality_col->getDictionary().getNestedColumn().get(); - low_cardinality_keys.positions[i] = &low_cardinality_col->getIndexes(); - low_cardinality_keys.position_sizes[i] = low_cardinality_col->getSizeOfIndexType(); - } - else - low_cardinality_keys.nested_columns[i] = key_columns[i]; - } - } - - if (has_nullable_keys) - Base::init(key_columns); - } - - ALWAYS_INLINE Key getKey( - const ColumnRawPtrs & key_columns, - size_t keys_size, - size_t i, - const Sizes & key_sizes, - StringRefs &, - Arena &) const - { - if (has_nullable_keys) - { - auto bitmap = Base::createBitmap(i); - return packFixed(i, keys_size, Base::getActualColumns(), key_sizes, bitmap); - } - else - { - if constexpr (has_low_cardinality) - return packFixed(i, keys_size, low_cardinality_keys.nested_columns, key_sizes, - &low_cardinality_keys.positions, &low_cardinality_keys.position_sizes); - - return packFixed(i, keys_size, key_columns, key_sizes); - } - } - }; - - static AggregateDataPtr & getAggregateData(Mapped & value) { return value; } - static const AggregateDataPtr & getAggregateData(const Mapped & value) { return value; } - - static ALWAYS_INLINE void onNewKey(typename Data::value_type &, size_t, StringRefs &, Arena &) - { - } - - static ALWAYS_INLINE void onExistingKey(const Key &, StringRefs &, Arena &) {} - - static const bool no_consecutive_keys_optimization = false; static const bool low_cardinality_optimization = false; static void insertKeyIntoColumns(const typename Data::value_type & value, MutableColumns & key_columns, const Sizes & key_sizes) @@ -904,8 +331,6 @@ struct AggregationMethodKeysFixed } } } - - static AggregationStateCachePtr createCache(const AggregationStateCache::Settings & /*settings*/) { return nullptr; } }; @@ -930,53 +355,24 @@ struct AggregationMethodSerialized template AggregationMethodSerialized(const Other & other) : data(other.data) {} - struct State - { - void init(ColumnRawPtrs &) - { - } + using State = ColumnsHashing::HashMethodSerialized; - ALWAYS_INLINE Key getKey( - const ColumnRawPtrs & key_columns, - size_t keys_size, - size_t i, - const Sizes &, - StringRefs &, - Arena & pool) const - { - return serializeKeysToPoolContiguous(i, keys_size, key_columns, pool); - } - }; - - static AggregateDataPtr & getAggregateData(Mapped & value) { return value; } - static const AggregateDataPtr & getAggregateData(const Mapped & value) { return value; } - - static ALWAYS_INLINE void onNewKey(typename Data::value_type &, size_t, StringRefs &, Arena &) - { - } - - static ALWAYS_INLINE void onExistingKey(const Key & key, StringRefs &, Arena & pool) - { - pool.rollback(key.size); - } - - /// If the key already was, it is removed from the pool (overwritten), and the next key can not be compared with it. - static const bool no_consecutive_keys_optimization = true; static const bool low_cardinality_optimization = false; static void insertKeyIntoColumns(const typename Data::value_type & value, MutableColumns & key_columns, const Sizes &) { auto pos = value.first.data; - for (size_t i = 0; i < key_columns.size(); ++i) - pos = key_columns[i]->deserializeAndInsertFromArena(pos); + for (auto & column : key_columns) + pos = column->deserializeAndInsertFromArena(pos); } - - static AggregationStateCachePtr createCache(const AggregationStateCache::Settings & /*settings*/) { return nullptr; } }; class Aggregator; +using ColumnsHashing::HashMethodContext; +using ColumnsHashing::HashMethodContextPtr; + struct AggregatedDataVariants : private boost::noncopyable { /** Working with states of aggregate functions in the pool is arranged in the following (inconvenient) way: @@ -1298,7 +694,7 @@ struct AggregatedDataVariants : private boost::noncopyable } } - static AggregationStateCachePtr createCache(Type type, const AggregationStateCache::Settings & settings) + static HashMethodContextPtr createCache(Type type, const HashMethodContext::Settings & settings) { switch (type) { @@ -1309,7 +705,7 @@ struct AggregatedDataVariants : private boost::noncopyable { \ using TPtr ## NAME = decltype(AggregatedDataVariants::NAME); \ using T ## NAME = typename TPtr ## NAME ::element_type; \ - return T ## NAME ::createCache(settings); \ + return T ## NAME ::State::createContext(settings); \ } APPLY_FOR_AGGREGATED_VARIANTS(M) @@ -1496,7 +892,7 @@ protected: AggregatedDataVariants::Type method_chosen; Sizes key_sizes; - AggregationStateCachePtr aggregation_state_cache; + HashMethodContextPtr aggregation_state_cache; AggregateFunctionsPlainPtrs aggregate_functions; From 6fce028b56eb97ccfd65f3ce9624d28dfcc603fb Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 21 Jan 2019 13:39:53 +0300 Subject: [PATCH 103/586] Refactor ColumnsHashing. --- dbms/src/Common/ColumnsHashing.h | 484 +++++++-------------------- dbms/src/Common/ColumnsHashingImpl.h | 276 +++++++++++++++ dbms/src/Interpreters/Aggregator.cpp | 82 ++--- dbms/src/Interpreters/Aggregator.h | 12 +- 4 files changed, 447 insertions(+), 407 deletions(-) create mode 100644 dbms/src/Common/ColumnsHashingImpl.h diff --git a/dbms/src/Common/ColumnsHashing.h b/dbms/src/Common/ColumnsHashing.h index 0a6d5464341..61c3d71cbbc 100644 --- a/dbms/src/Common/ColumnsHashing.h +++ b/dbms/src/Common/ColumnsHashing.h @@ -1,14 +1,17 @@ #pragma once -#include -#include -#include -#include -#include + + +#include #include -#include #include #include -#include + +#include +#include +#include + +#include +#include namespace DB { @@ -32,118 +35,12 @@ public: using HashMethodContextPtr = std::shared_ptr; -template -struct MappedTraits -{ - using Type = void *; - static Type getMapped(T &) { return nullptr; } - static T & getKey(T & key) { return key; } -}; - -template -struct MappedTraits> -{ - using Type = Second *; - static Type getMapped(PairNoInit & value) { return &value.second; } - static First & getKey(PairNoInit & value) { return value.first; } -}; - -template -struct HashTableTraits -{ - using Value = typename Data::value_type; - using Mapped = typename MappedTraits::Type; - - static Mapped getMapped(Value & value) { return MappedTraits::getMapped(value); } - static auto & getKey(Value & value) { return MappedTraits::getKey(value); } -}; - -template -struct LastElementCache -{ - static constexpr bool consecutive_keys_optimization = consecutive_keys_optimization_; - using Value = typename HashTableTraits::Value; - Value value; - bool empty = true; - bool found = false; - - auto getMapped() { return HashTableTraits::getMapped(value); } - auto & getKey() { return HashTableTraits::getKey(value); } -}; - -template -struct LastElementCache -{ - static constexpr bool consecutive_keys_optimization = false; -}; - -template -inline ALWAYS_INLINE typename HashTableTraits::Value & emplaceKeyImpl( - Key key, Data & data, bool & inserted, Cache & cache [[maybe_unused]]) -{ - if constexpr (Cache::consecutive_keys_optimization) - { - if (!cache.empty && cache.found && cache.getKey() == key) - { - inserted = false; - return cache.value; - } - } - - typename Data::iterator it; - data.emplace(key, it, inserted); - auto & value = *it; - - if constexpr (Cache::consecutive_keys_optimization) - { - cache.value = value; - cache.empty = false; - cache.found = true; - } - - return value; -} - -template -inline ALWAYS_INLINE typename HashTableTraits::Mapped findKeyImpl( - Key key, Data & data, bool & found, Cache & cache [[maybe_unused]]) -{ - if constexpr (Cache::consecutive_keys_optimization) - { - if (!cache.empty && cache.getKey() == key) - { - found = cache.found; - return found ? cache.getMapped() : nullptr; - } - } - - auto it = data.find(key); - - found = it != data.end(); - auto mapped = found ? HashTableTraits::getMapped(*it) - : nullptr; - - if constexpr (Cache::consecutive_keys_optimization) - { - if (found) - cache.value = *it; - else - cache.getKey() = key; - - cache.empty = false; - cache.found = found; - } - - return mapped; -} - - /// For the case where there is one numeric key. -template /// UInt8/16/32/64 for any type with corresponding bit width. -struct HashMethodOneNumber +template /// UInt8/16/32/64 for any type with corresponding bit width. +struct HashMethodOneNumber : public columns_hashing_impl::HashMethodBase { + using Base = columns_hashing_impl::HashMethodBase; const char * vec; - LastElementCache last_elem_cache; /// If the keys of a fixed length then key_sizes contains their lengths, empty otherwise. HashMethodOneNumber(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) @@ -158,27 +55,20 @@ struct HashMethodOneNumber /// Emplace key into HashTable or HashMap. If Data is HashMap, returns ptr to value, otherwise nullptr. template - ALWAYS_INLINE typename HashTableTraits::Mapped emplaceKey( + ALWAYS_INLINE typename Base::EmplaceResult emplaceKey( Data & data, /// HashTable size_t row, /// From which row of the block insert the key - bool & inserted, Arena & /*pool*/) /// For Serialized method, key may be placed in pool. { - return HashTableTraits::getMapped(emplaceKeyImpl(getKey(row), data, inserted, last_elem_cache)); + typename Data::iterator it; + return Base::emplaceKeyImpl(getKey(row), data, it); } /// Find key into HashTable or HashMap. If Data is HashMap and key was found, returns ptr to value, otherwise nullptr. template - ALWAYS_INLINE typename HashTableTraits::Mapped findKey(Data & data, size_t row, bool & found, Arena & /*pool*/) + ALWAYS_INLINE typename Base::FindResult findKey(Data & data, size_t row, Arena & /*pool*/) { - return findKeyImpl(getKey(row), data, found, last_elem_cache); - } - - /// Insert the key from the hash table into columns. - template - static void insertKeyIntoColumns(const Value & value, MutableColumns & key_columns, const Sizes & /*key_sizes*/) - { - static_cast(key_columns[0].get())->insertRawData(reinterpret_cast(&value.first)); + return Base::findKeyImpl(getKey(row), data); } /// Get hash value of row. @@ -189,34 +79,24 @@ struct HashMethodOneNumber } /// Get StringRef from value which can be inserted into column. - template static StringRef getValueRef(const Value & value) { return StringRef(reinterpret_cast(&value.first), sizeof(value.first)); } - /// Cache last result if key was inserted. - template - ALWAYS_INLINE void cacheData(size_t /*row*/, Mapped mapped) - { - *last_elem_cache.getMapped() = mapped; - } - protected: - template static ALWAYS_INLINE void onNewKey(Value & /*value*/, Arena & /*pool*/) {} }; /// For the case where there is one string key. -template -struct HashMethodString +template +struct HashMethodString : public columns_hashing_impl::HashMethodBase { + using Base = columns_hashing_impl::HashMethodBase; const IColumn::Offset * offsets; const UInt8 * chars; - LastElementCache last_elem_cache; - HashMethodString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) { const IColumn & column = *key_columns[0]; @@ -230,28 +110,23 @@ struct HashMethodString StringRef getKey(size_t row) const { return StringRef(chars + offsets[row - 1], offsets[row] - offsets[row - 1] - 1); } template - ALWAYS_INLINE typename HashTableTraits::Mapped emplaceKey(Data & data, size_t row, bool & inserted, Arena & pool) + ALWAYS_INLINE typename Base::EmplaceResult emplaceKey(Data & data, size_t row, Arena & pool) { - auto & value = emplaceKeyImpl(getKey(row), data, inserted, last_elem_cache); - if (inserted) + auto key = getKey(row); + typename Data::iterator it; + auto result = Base::emplaceKeyImpl(key, data, it); + if (result.isInserted()) { - auto & key = HashTableTraits::getKey(value); if (key.size) - key.data = pool.insert(key.data, key.size); + it->first.data = pool.insert(key.data, key.size); } - return HashTableTraits::getMapped(value); + return result; } template - ALWAYS_INLINE typename HashTableTraits::Mapped findKey(Data & data, size_t row, bool & found, Arena & /*pool*/) + ALWAYS_INLINE typename Base::FindResult findKey(Data & data, size_t row, Arena & /*pool*/) { - return findKeyImpl(getKey(row), data, found, last_elem_cache); - } - - template - static void insertKeyIntoColumns(const Value & value, MutableColumns & key_columns, const Sizes & /*key_sizes*/) - { - key_columns[0]->insertData(value.first.data, value.first.size); + return Base::findKeyImpl(getKey(row), data); } template @@ -260,20 +135,12 @@ struct HashMethodString return data.hash(getKey(row)); } - template static StringRef getValueRef(const Value & value) { return StringRef(value.first.data, value.first.size); } - template - ALWAYS_INLINE void cacheData(size_t /*row*/, Mapped mapped) - { - *last_elem_cache.getMapped() = mapped; - } - protected: - template static ALWAYS_INLINE void onNewKey(Value & value, Arena & pool) { if (value.first.size) @@ -283,14 +150,13 @@ protected: /// For the case where there is one fixed-length string key. -template -struct HashMethodFixedString +template +struct HashMethodFixedString : public columns_hashing_impl::HashMethodBase { + using Base = columns_hashing_impl::HashMethodBase; size_t n; const ColumnFixedString::Chars * chars; - LastElementCache last_elem_cache; - HashMethodFixedString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) { const IColumn & column = *key_columns[0]; @@ -304,27 +170,21 @@ struct HashMethodFixedString StringRef getKey(size_t row) const { return StringRef(&(*chars)[row * n], n); } template - ALWAYS_INLINE typename HashTableTraits::Mapped emplaceKey(Data & data, size_t row, bool & inserted, Arena & pool) + ALWAYS_INLINE typename Base::EmplaceResult emplaceKey(Data & data, size_t row, Arena & pool) { - auto & value = emplaceKeyImpl(getKey(row), data, inserted, last_elem_cache); - if (inserted) - { - auto & key = HashTableTraits::getKey(value); - key.data = pool.insert(key.data, key.size); - } - return HashTableTraits::getMapped(value); + auto key = getKey(row); + typename Data::iterator it; + auto res = Base::emplaceKeyImpl(key, data, it); + if (res.isInserted()) + it->first.data = pool.insert(key.data, key.size); + + return res; } template - ALWAYS_INLINE typename HashTableTraits::Mapped findKey(Data & data, size_t row, bool & found, Arena & /*pool*/) + ALWAYS_INLINE typename Base::FindResult findKey(Data & data, size_t row, Arena & /*pool*/) { - return findKeyImpl(getKey(row), data, found, last_elem_cache); - } - - template - static void insertKeyIntoColumns(const Value & value, MutableColumns & key_columns, const Sizes & /*key_sizes*/) - { - key_columns[0]->insertData(value.first.data, value.first.size); + return Base::findKeyImpl(getKey(row), data); } template @@ -333,20 +193,12 @@ struct HashMethodFixedString return data.hash(getKey(row)); } - template static StringRef getValueRef(const Value & value) { return StringRef(value.first.data, value.first.size); } - template - ALWAYS_INLINE void cacheData(size_t /*row*/, Mapped mapped) - { - *last_elem_cache.getMapped() = mapped; - } - protected: - template static ALWAYS_INLINE void onNewKey(Value & value, Arena & pool) { value.first.data = pool.insert(value.first.data, value.first.size); @@ -400,12 +252,24 @@ private: Cache cache; }; + /// Single low cardinality column. -template +template struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod { using Base = SingleColumnMethod; + enum class VisitValue + { + Empty = 0, + Found = 1, + NotFound = 2, + }; + + static constexpr bool has_mapped = !std::is_same::value; + using EmplaceResult = columns_hashing_impl::EmplaceResultImpl; + using FindResult = columns_hashing_impl::FindResultImpl; + static HashMethodContextPtr createContext(const HashMethodContext::Settings & settings) { return std::make_shared(settings); @@ -421,7 +285,8 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod ColumnPtr dictionary_holder; /// Cache AggregateDataPtr for current column in order to decrease the number of hash table usages. - PaddedPODArray aggregate_data_cache; + columns_hashing_impl::MappedCache mapped_cache; + PaddedPODArray visit_cache; /// If initialized column is nullable. bool is_nullable = false; @@ -495,8 +360,11 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod } } - AggregateDataPtr default_data = nullptr; - aggregate_data_cache.assign(key_columns[0]->size(), default_data); + if constexpr (has_mapped) + mapped_cache.resize(key_columns[0]->size()); + + VisitValue empty(VisitValue::Empty); + visit_cache.assign(key_columns[0]->size(), empty); size_of_index_type = column->getSizeOfIndexType(); positions = column->getIndexesPtr().get(); @@ -521,41 +389,45 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod } template - ALWAYS_INLINE typename HashTableTraits::Mapped emplaceKey(Data & data, size_t row_, bool & inserted, Arena & pool) + ALWAYS_INLINE EmplaceResult emplaceKey(Data & data, size_t row_, Arena & pool) { size_t row = getIndexAt(row_); if (is_nullable && row == 0) { - inserted = !data.hasNullKeyData(); - data.hasNullKeyData() = true; - return &data.getNullKeyData(); + visit_cache[row] = VisitValue::Found; + if constexpr (has_mapped) + return EmplaceResult(data.getNullKeyData(), mapped_cache[0], !data.hasNullKeyData()); + else + return EmplaceResult(!data.hasNullKeyData()); } - if constexpr (use_cache) + if (visit_cache[row] == VisitValue::Found) { - if (aggregate_data_cache[row]) - { - inserted = false; - return &aggregate_data_cache[row]; - } + if constexpr (has_mapped) + return EmplaceResult(mapped_cache[row], mapped_cache[row], false); + else + return EmplaceResult(false); } - Sizes key_sizes; auto key = getKey(row_); + bool inserted = false; typename Data::iterator it; if (saved_hash) data.emplace(key, it, inserted, saved_hash[row]); else data.emplace(key, it, inserted); + visit_cache[row] = VisitValue::Found; + if (inserted) Base::onNewKey(*it, pool); - else if constexpr (use_cache) - aggregate_data_cache[row] = it->second; - return HashTableTraits::getMapped(*it); + if constexpr (has_mapped) + return EmplaceResult(it->second, mapped_cache[row], inserted); + else + return EmplaceResult(inserted); } ALWAYS_INLINE bool isNullAt(size_t i) @@ -566,25 +438,25 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod return getIndexAt(i) == 0; } - template - ALWAYS_INLINE void cacheData(size_t i, Mapped mapped) - { - size_t row = getIndexAt(i); - aggregate_data_cache[row] = mapped; - } - template - ALWAYS_INLINE typename HashTableTraits::Mapped findFromRow(Data & data, size_t row_, bool & found, Arena &) + ALWAYS_INLINE FindResult findFromRow(Data & data, size_t row_, Arena &) { size_t row = getIndexAt(row_); if (is_nullable && row == 0) - return data.hasNullKeyData() ? &data.getNullKeyData() : nullptr; - - if constexpr (use_cache) { - if (aggregate_data_cache[row]) - return &aggregate_data_cache[row]; + if constexpr (has_mapped) + return FindResult(data.hasNullKeyData() ? data.getNullKeyData() : Mapped(), data.hasNullKeyData()); + else + return FindResult(data.hasNullKeyData()); + } + + if (visit_cache[row] != VisitValue::Empty) + { + if constexpr (has_mapped) + return FindResult(mapped_cache[row], visit_cache[row] == VisitValue::Found); + else + return FindResult(visit_cache[row] == VisitValue::Found); } auto key = getKey(row_); @@ -595,14 +467,19 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod else it = data.find(key); - found = it != data.end(); - if constexpr (use_cache) + bool found = it != data.end(); + visit_cache[row] = found ? VisitValue::Found : VisitValue::NotFound; + + if constexpr (has_mapped) { if (found) - aggregate_data_cache[row] = it->second; + mapped_cache[row] = it->second; } - return typename HashTableTraits::getMapped(*it); + if constexpr (has_mapped) + return FindResult(mapped_cache[row], found); + else + return FindResult(found); } template @@ -614,108 +491,9 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod return Base::getHash(data, row, pool); } - - template - static void insertKeyIntoColumns(const Value & value, MutableColumns & key_columns_low_cardinality, const Sizes & /*key_sizes*/) - { - auto ref = Base::getValueRef(value); - static_cast(key_columns_low_cardinality[0].get())->insertData(ref.data, ref.size); - } }; -namespace columns_hashing_impl -{ - -/// This class is designed to provide the functionality that is required for -/// supporting nullable keys in HashMethodKeysFixed. If there are -/// no nullable keys, this class is merely implemented as an empty shell. -template -class BaseStateKeysFixed; - -/// Case where nullable keys are supported. -template -class BaseStateKeysFixed -{ -protected: - void init(const ColumnRawPtrs & key_columns) - { - null_maps.reserve(key_columns.size()); - actual_columns.reserve(key_columns.size()); - - for (const auto & col : key_columns) - { - if (col->isColumnNullable()) - { - const auto & nullable_col = static_cast(*col); - actual_columns.push_back(&nullable_col.getNestedColumn()); - null_maps.push_back(&nullable_col.getNullMapColumn()); - } - else - { - actual_columns.push_back(col); - null_maps.push_back(nullptr); - } - } - } - - /// Return the columns which actually contain the values of the keys. - /// For a given key column, if it is nullable, we return its nested - /// column. Otherwise we return the key column itself. - inline const ColumnRawPtrs & getActualColumns() const - { - return actual_columns; - } - - /// Create a bitmap that indicates whether, for a particular row, - /// a key column bears a null value or not. - KeysNullMap createBitmap(size_t row) const - { - KeysNullMap bitmap{}; - - for (size_t k = 0; k < null_maps.size(); ++k) - { - if (null_maps[k] != nullptr) - { - const auto & null_map = static_cast(*null_maps[k]).getData(); - if (null_map[row] == 1) - { - size_t bucket = k / 8; - size_t offset = k % 8; - bitmap[bucket] |= UInt8(1) << offset; - } - } - } - - return bitmap; - } - -private: - ColumnRawPtrs actual_columns; - ColumnRawPtrs null_maps; -}; - -/// Case where nullable keys are not supported. -template -class BaseStateKeysFixed -{ -protected: - void init(const ColumnRawPtrs & columns) { actual_columns = columns; } - - const ColumnRawPtrs & getActualColumns() const { return actual_columns; } - - KeysNullMap createBitmap(size_t) const - { - throw Exception{"Internal error: calling createBitmap() for non-nullable keys" - " is forbidden", ErrorCodes::LOGICAL_ERROR}; - } - -private: - ColumnRawPtrs actual_columns; -}; - -} - // Optional mask for low cardinality columns. template struct LowCardinalityKeys @@ -729,11 +507,11 @@ template <> struct LowCardinalityKeys {}; /// For the case where all keys are of fixed length, and they fit in N (for example, 128) bits. -template -struct HashMethodKeysFixed : private columns_hashing_impl::BaseStateKeysFixed +template +struct HashMethodKeysFixed + : private columns_hashing_impl::BaseStateKeysFixed + , public columns_hashing_impl::HashMethodBase { - using Key = typename TData::key_type; - static constexpr bool has_nullable_keys = has_nullable_keys_; static constexpr bool has_low_cardinality = has_low_cardinality_; @@ -741,9 +519,8 @@ struct HashMethodKeysFixed : private columns_hashing_impl::BaseStateKeysFixed last_elem_cache; - using Base = columns_hashing_impl::BaseStateKeysFixed; + using BaseHashed = columns_hashing_impl::HashMethodBase; HashMethodKeysFixed(const ColumnRawPtrs & key_columns, const Sizes & key_sizes, const HashMethodContextPtr &) : key_sizes(std::move(key_sizes)), keys_size(key_columns.size()) @@ -789,21 +566,16 @@ struct HashMethodKeysFixed : private columns_hashing_impl::BaseStateKeysFixed - ALWAYS_INLINE typename HashTableTraits::Mapped emplaceKey(Data & data, size_t row, bool & inserted, Arena & /*pool*/) + ALWAYS_INLINE typename BaseHashed::EmplaceResult emplaceKey(Data & data, size_t row, Arena & /*pool*/) { - return HashTableTraits::getMapped(emplaceKeyImpl(getKey(row), data, inserted, last_elem_cache)); + typename Data::iterator it; + return BaseHashed::emplaceKeyImpl(getKey(row), data, it); } template - ALWAYS_INLINE typename HashTableTraits::Mapped findKey(Data & data, size_t row, bool & found, Arena & /*pool*/) + ALWAYS_INLINE typename BaseHashed::FindResult findKey(Data & data, size_t row, Arena & /*pool*/) { - return findKeyImpl(getKey(row), data, found, last_elem_cache); - } - - template - static StringRef getValueRef(const Value & value) - { - return StringRef(value.first.data, value.first.size); + return BaseHashed::findKeyImpl(getKey(row), data); } template @@ -811,12 +583,6 @@ struct HashMethodKeysFixed : private columns_hashing_impl::BaseStateKeysFixed - ALWAYS_INLINE void cacheData(size_t /*row*/, Mapped mapped) - { - *last_elem_cache.getMapped() = mapped; - } }; /** Hash by concatenating serialized key values. @@ -824,12 +590,12 @@ struct HashMethodKeysFixed : private columns_hashing_impl::BaseStateKeysFixed -struct HashMethodSerialized +template +struct HashMethodSerialized : public columns_hashing_impl::HashMethodBase { + using Base = columns_hashing_impl::HashMethodBase; ColumnRawPtrs key_columns; size_t keys_size; - LastElementCache last_elem_cache; HashMethodSerialized(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) : key_columns(key_columns), keys_size(key_columns.size()) {} @@ -837,24 +603,25 @@ struct HashMethodSerialized static HashMethodContextPtr createContext(const HashMethodContext::Settings &) { return nullptr; } template - ALWAYS_INLINE typename HashTableTraits::Mapped emplaceKey(Data & data, size_t row, bool & inserted, Arena & pool) + ALWAYS_INLINE typename Base::EmplaceResult emplaceKey(Data & data, size_t row, Arena & pool) { auto key = getKey(row, pool); - auto & value = emplaceKeyImpl(key, data, inserted, last_elem_cache); - if (!inserted) + typename Data::iterator it; + auto res = Base::emplaceKeyImpl(key, data, it); + if (!res.isInserted()) pool.rollback(key.size); - return HashTableTraits::getMapped(value); + return res; } template - ALWAYS_INLINE typename HashTableTraits::Mapped findKey(Data & data, size_t row, bool & found, Arena & pool) + ALWAYS_INLINE typename Base::FindResult findKey(Data & data, size_t row, Arena & pool) { auto key = getKey(row, pool); - auto mapped = findKeyImpl(key, data, found, last_elem_cache); + auto res = Base::findKeyImpl(key, data); pool.rollback(key.size); - return mapped; + return res; } template @@ -867,9 +634,6 @@ struct HashMethodSerialized return hash; } - template - ALWAYS_INLINE void cacheData(size_t /*row*/, Mapped /*mapped*/) {} - protected: ALWAYS_INLINE StringRef getKey(size_t row, Arena & pool) const { diff --git a/dbms/src/Common/ColumnsHashingImpl.h b/dbms/src/Common/ColumnsHashingImpl.h new file mode 100644 index 00000000000..565940b3338 --- /dev/null +++ b/dbms/src/Common/ColumnsHashingImpl.h @@ -0,0 +1,276 @@ +#pragma once + +#include +#include + +namespace DB +{ + +namespace ColumnsHashing +{ + +namespace columns_hashing_impl +{ + +template +struct LastElementCache +{ + static constexpr bool consecutive_keys_optimization = consecutive_keys_optimization_; + Value value; + bool empty = true; + bool found = false; + + bool check(const Value & value_) { return !empty && value == value_; } + + template + bool check(const Key & key) { return !empty && value.first == key; } +}; + +template +struct LastElementCache +{ + static constexpr bool consecutive_keys_optimization = false; +}; + +template +class EmplaceResultImpl +{ + Mapped & value; + Mapped & cached_value; + bool inserted; + +public: + EmplaceResultImpl(Mapped & value, Mapped & cached_value, bool inserted) + : value(value), cached_value(cached_value), inserted(inserted) {} + + bool isInserted() const { return inserted; } + const auto & getMapped() const { return value; } + void setMapped(const Mapped & mapped) { value = cached_value = mapped; } +}; + +template <> +class EmplaceResultImpl +{ + bool inserted; + +public: + explicit EmplaceResultImpl(bool inserted) : inserted(inserted) {} + bool isInserted() const { return inserted; } +}; + +template +class FindResultImpl +{ + Mapped value; + bool found; + +public: + FindResultImpl(Mapped value, bool found) : value(value), found(found) {} + bool isFound() const { return found; } + const Mapped & getMapped() const { return value; } +}; + +template <> +class FindResultImpl +{ + bool found; + +public: + explicit FindResultImpl(bool found) : found(found) {} + bool isFound() const { return found; } +}; + +template +struct HashMethodBase +{ + using EmplaceResult = EmplaceResultImpl; + using FindResult = FindResultImpl; + static constexpr bool has_mapped = !std::is_same::value; + using Cache = LastElementCache; + +protected: + Cache cache; + + HashMethodBase() + { + if constexpr (has_mapped && consecutive_keys_optimization) + { + /// Init PairNoInit elements. + cache.value.second = Mapped(); + using Key = decltype(cache.value.first); + cache.value.first = Key(); + } + } + + template + ALWAYS_INLINE EmplaceResult emplaceKeyImpl(Key key, Data & data, typename Data::iterator & it) + { + if constexpr (Cache::consecutive_keys_optimization) + { + if (cache.found && cache.check(key)) + { + if constexpr (has_mapped) + return EmplaceResult(cache.value.second, cache.value.second, false); + else + return EmplaceResult(false); + } + } + + bool inserted = false; + data.emplace(key, it, inserted); + Mapped * cached = &it->second; + + if constexpr (consecutive_keys_optimization) + { + cache.value = *it; + cache.found = true; + cache.empty = false; + cached = &cache.value.second; + } + + if constexpr (has_mapped) + return EmplaceResult(it->second, *cached, inserted); + else + return EmplaceResult(inserted); + } + + template + ALWAYS_INLINE FindResult findKeyImpl(Key key, Data & data) + { + if constexpr (Cache::consecutive_keys_optimization) + { + if (cache.check(key)) + { + if constexpr (has_mapped) + return FindResult(cache.found ? cache.value.second : Mapped(), cache.found); + else + return FindResult(cache.found); + } + } + + auto it = data.find(key); + bool found = it != data.end(); + + if constexpr (consecutive_keys_optimization) + { + cache.found = found; + cache.empty = false; + + if (found) + cache.value = *it; + else + { + if constexpr (has_mapped) + cache.value.first = key; + else + cache.value = key; + } + } + + if constexpr (has_mapped) + return FindResult(found ? it->second : Mapped(), found); + else + return FindResult(found); + } +}; + + +template +struct MappedCache : public PaddedPODArray {}; + +template <> +struct MappedCache {}; + + +/// This class is designed to provide the functionality that is required for +/// supporting nullable keys in HashMethodKeysFixed. If there are +/// no nullable keys, this class is merely implemented as an empty shell. +template +class BaseStateKeysFixed; + +/// Case where nullable keys are supported. +template +class BaseStateKeysFixed +{ +protected: + void init(const ColumnRawPtrs & key_columns) + { + null_maps.reserve(key_columns.size()); + actual_columns.reserve(key_columns.size()); + + for (const auto & col : key_columns) + { + if (col->isColumnNullable()) + { + const auto & nullable_col = static_cast(*col); + actual_columns.push_back(&nullable_col.getNestedColumn()); + null_maps.push_back(&nullable_col.getNullMapColumn()); + } + else + { + actual_columns.push_back(col); + null_maps.push_back(nullptr); + } + } + } + + /// Return the columns which actually contain the values of the keys. + /// For a given key column, if it is nullable, we return its nested + /// column. Otherwise we return the key column itself. + inline const ColumnRawPtrs & getActualColumns() const + { + return actual_columns; + } + + /// Create a bitmap that indicates whether, for a particular row, + /// a key column bears a null value or not. + KeysNullMap createBitmap(size_t row) const + { + KeysNullMap bitmap{}; + + for (size_t k = 0; k < null_maps.size(); ++k) + { + if (null_maps[k] != nullptr) + { + const auto & null_map = static_cast(*null_maps[k]).getData(); + if (null_map[row] == 1) + { + size_t bucket = k / 8; + size_t offset = k % 8; + bitmap[bucket] |= UInt8(1) << offset; + } + } + } + + return bitmap; + } + +private: + ColumnRawPtrs actual_columns; + ColumnRawPtrs null_maps; +}; + +/// Case where nullable keys are not supported. +template +class BaseStateKeysFixed +{ +protected: + void init(const ColumnRawPtrs & columns) { actual_columns = columns; } + + const ColumnRawPtrs & getActualColumns() const { return actual_columns; } + + KeysNullMap createBitmap(size_t) const + { + throw Exception{"Internal error: calling createBitmap() for non-nullable keys" + " is forbidden", ErrorCodes::LOGICAL_ERROR}; + } + +private: + ColumnRawPtrs actual_columns; +}; + +} + +} + +} diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 785345f9400..e26d94f53e4 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -609,20 +609,34 @@ void NO_INLINE Aggregator::executeImplCase( /// NOTE When editing this code, also pay attention to SpecializedAggregator.h. /// For all rows. - AggregateDataPtr value = nullptr; for (size_t i = 0; i < rows; ++i) { - bool inserted = false; /// Inserted a new key, or was this key already? - - AggregateDataPtr * aggregate_data = nullptr; + AggregateDataPtr aggregate_data = nullptr; if constexpr (!no_more_keys) /// Insert. - aggregate_data = state.emplaceKey(method.data, i, inserted, *aggregates_pool); + { + auto emplace_result = state.emplaceKey(method.data, i, *aggregates_pool); + + /// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key. + if (emplace_result.isInserted()) + { + /// exception-safety - if you can not allocate memory or create states, then destructors will not be called. + emplace_result.setMapped(nullptr); + + aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); + createAggregateStates(aggregate_data); + + emplace_result.setMapped(aggregate_data); + } + else + aggregate_data = emplace_result.getMapped(); + } else { /// Add only if the key already exists. - bool found = false; - aggregate_data = state.findKey(method.data, i, found, *aggregates_pool); + auto find_result = state.findKey(method.data, i, *aggregates_pool); + if (find_result.isFound()) + aggregate_data = find_result.getMapped(); } /// aggregate_date == nullptr means that the new key did not fit in the hash table because of no_more_keys. @@ -631,20 +645,7 @@ void NO_INLINE Aggregator::executeImplCase( if (!aggregate_data && !overflow_row) continue; - /// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key. - if (inserted) - { - /// exception-safety - if you can not allocate memory or create states, then destructors will not be called. - *aggregate_data = nullptr; - - AggregateDataPtr place = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - createAggregateStates(place); - *aggregate_data = place; - - state.cacheData(i, place); - } - - value = aggregate_data ? *aggregate_data : overflow_row; + AggregateDataPtr value = aggregate_data ? aggregate_data : overflow_row; /// Add values to the aggregate functions. for (AggregateFunctionInstruction * inst = aggregate_instructions; inst->that; ++inst) @@ -1951,17 +1952,28 @@ void NO_INLINE Aggregator::mergeStreamsImplCase( size_t rows = block.rows(); for (size_t i = 0; i < rows; ++i) { - typename Table::iterator it; - AggregateDataPtr * aggregate_data = nullptr; - - bool inserted = false; /// Inserted a new key, or was this key already? + AggregateDataPtr aggregate_data = nullptr; if (!no_more_keys) - aggregate_data = state.emplaceKey(data, i, inserted, *aggregates_pool); + { + auto emplace_result = state.emplaceKey(data, i, *aggregates_pool); + if (emplace_result.isInserted()) + { + emplace_result.setMapped(nullptr); + + aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); + createAggregateStates(aggregate_data); + + emplace_result.setMapped(aggregate_data); + } + else + aggregate_data = emplace_result.getMapped(); + } else { - bool found; - aggregate_data = state.findKey(data, i, found, *aggregates_pool); + auto find_result = state.findKey(data, i, *aggregates_pool); + if (find_result.isFound()) + aggregate_data = find_result.getMapped(); } /// aggregate_date == nullptr means that the new key did not fit in the hash table because of no_more_keys. @@ -1970,19 +1982,7 @@ void NO_INLINE Aggregator::mergeStreamsImplCase( if (!aggregate_data && !overflow_row) continue; - /// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key. - if (inserted) - { - *aggregate_data = nullptr; - - AggregateDataPtr place = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - createAggregateStates(place); - *aggregate_data = place; - - state.cacheData(i, place); - } - - AggregateDataPtr value = aggregate_data ? *aggregate_data : overflow_row; + AggregateDataPtr value = aggregate_data ? aggregate_data : overflow_row; /// Merge state of aggregate functions. for (size_t j = 0; j < params.aggregates_size; ++j) diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index 0b40f4e6a25..9112e0265cb 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -158,7 +158,7 @@ struct AggregationMethodOneNumber AggregationMethodOneNumber(const Other & other) : data(other.data) {} /// To use one `Method` in different threads, use different `State`. - using State = ColumnsHashing::HashMethodOneNumber; + using State = ColumnsHashing::HashMethodOneNumber; /// Use optimization for low cardinality. static const bool low_cardinality_optimization = false; @@ -188,7 +188,7 @@ struct AggregationMethodString template AggregationMethodString(const Other & other) : data(other.data) {} - using State = ColumnsHashing::HashMethodString; + using State = ColumnsHashing::HashMethodString; static const bool low_cardinality_optimization = false; @@ -216,7 +216,7 @@ struct AggregationMethodFixedString template AggregationMethodFixedString(const Other & other) : data(other.data) {} - using State = ColumnsHashing::HashMethodFixedString; + using State = ColumnsHashing::HashMethodFixedString; static const bool low_cardinality_optimization = false; @@ -246,7 +246,7 @@ struct AggregationMethodSingleLowCardinalityColumn : public SingleColumnMethod template explicit AggregationMethodSingleLowCardinalityColumn(const Other & other) : Base(other) {} - using State = ColumnsHashing::HashMethodSingleLowCardinalityColumn; + using State = ColumnsHashing::HashMethodSingleLowCardinalityColumn; static const bool low_cardinality_optimization = true; @@ -277,7 +277,7 @@ struct AggregationMethodKeysFixed template AggregationMethodKeysFixed(const Other & other) : data(other.data) {} - using State = ColumnsHashing::HashMethodKeysFixed; + using State = ColumnsHashing::HashMethodKeysFixed; static const bool low_cardinality_optimization = false; @@ -355,7 +355,7 @@ struct AggregationMethodSerialized template AggregationMethodSerialized(const Other & other) : data(other.data) {} - using State = ColumnsHashing::HashMethodSerialized; + using State = ColumnsHashing::HashMethodSerialized; static const bool low_cardinality_optimization = false; From 506f2c41c818003e1d4918fe1bf9aea7fa18a5ec Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 21 Jan 2019 16:51:42 +0300 Subject: [PATCH 104/586] fixed MT alter --- dbms/src/Databases/DatabaseOrdinary.cpp | 7 ++++++- dbms/src/Interpreters/InterpreterCreateQuery.cpp | 3 --- .../0_stateless/00824_indices_alter.reference | 6 +++--- .../queries/0_stateless/00824_indices_alter.sql | 16 ++++++++-------- 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index 2634c6edae6..c6f8b0809eb 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -533,8 +533,13 @@ void DatabaseOrdinary::alterTable( ASTPtr new_columns = InterpreterCreateQuery::formatColumns(columns); ASTPtr new_indices = InterpreterCreateQuery::formatIndices(indices); + ast_create_query.columns_list->replace(ast_create_query.columns_list->columns, new_columns); - ast_create_query.columns_list->replace(ast_create_query.columns_list->columns, new_indices); + + if (ast_create_query.columns_list->indices) + ast_create_query.columns_list->replace(ast_create_query.columns_list->indices, new_indices); + else + ast_create_query.columns_list->set(ast_create_query.columns_list->indices, new_indices); if (storage_modifier) storage_modifier(*ast_create_query.storage); diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 944e2b0df7c..e4f16f1ee34 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -405,9 +405,6 @@ ASTPtr InterpreterCreateQuery::formatColumns(const ColumnsDescription & columns) ASTPtr InterpreterCreateQuery::formatIndices(const IndicesDescription & indices) { - if (indices.indices.empty()) - return nullptr; - auto res = std::make_shared(); for (const auto & index : indices.indices) diff --git a/dbms/tests/queries/0_stateless/00824_indices_alter.reference b/dbms/tests/queries/0_stateless/00824_indices_alter.reference index 275413608bb..43447d944de 100644 --- a/dbms/tests/queries/0_stateless/00824_indices_alter.reference +++ b/dbms/tests/queries/0_stateless/00824_indices_alter.reference @@ -1,4 +1,4 @@ -CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDER BY u64 INDICES idx1 BY u64 * i32 TYPE minmax GRANULARITY 10, idx3 BY u64 - i32 TYPE minmax GRANULARITY 10, idx2 BY u64 + i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192 1 2 1 2 1 2 @@ -6,7 +6,7 @@ CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDE 1 2 1 2 1 2 -CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDER BY u64 INDICES idx3 BY u64 - i32 TYPE minmax GRANULARITY 10, idx2 BY u64 + i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192 1 2 1 2 1 2 @@ -14,7 +14,7 @@ CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDE 1 2 1 2 CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDER BY u64 INDICES idx1 BY u64 * i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192 1 2 1 2 1 2 diff --git a/dbms/tests/queries/0_stateless/00824_indices_alter.sql b/dbms/tests/queries/0_stateless/00824_indices_alter.sql index f345d923351..6749d69ff28 100644 --- a/dbms/tests/queries/0_stateless/00824_indices_alter.sql +++ b/dbms/tests/queries/0_stateless/00824_indices_alter.sql @@ -10,9 +10,9 @@ ORDER BY u64; INSERT INTO test.minmax_idx VALUES (1, 2); -ALTER TABLE test.minmax_idx ADD INDEX idx1 BY u64 * i32 TYPE minmax GRANULARITY 10; -ALTER TABLE test.minmax_idx ADD INDEX idx2 BY u64 + i32 TYPE minmax GRANULARITY 10; -ALTER TABLE test.minmax_idx ADD INDEX idx3 BY u64 - i32 TYPE minmax GRANULARITY 10 AFTER idx1; +ALTER TABLE test.minmax_idx ADD INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10; +ALTER TABLE test.minmax_idx ADD INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10; +ALTER TABLE test.minmax_idx ADD INDEX idx3 (u64 - i32) TYPE minmax GRANULARITY 10 AFTER idx1; SHOW CREATE TABLE test.minmax_idx; @@ -37,7 +37,7 @@ ALTER TABLE test.minmax_idx DROP INDEX idx3; SHOW CREATE TABLE test.minmax_idx; -ALTER TABLE test.minmax_idx ADD INDEX idx1 BY u64 * i32 TYPE minmax GRANULARITY 10; +ALTER TABLE test.minmax_idx ADD INDEX idx1 (u64 * i32) TYPE minmax GRANULARITY 10; SHOW CREATE TABLE test.minmax_idx; @@ -47,11 +47,11 @@ SELECT * FROM test.minmax_idx WHERE u64 * i32 = 2; CREATE TABLE test.minmax_idx2 ( u64 UInt64, - i32 Int32 + i32 Int32, + INDEX idx1 (u64 + i32) TYPE minmax GRANULARITY 10, + INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 10 ) ENGINE = MergeTree() -ORDER BY u64 -INDICES idx1 BY u64 + i32 TYPE minmax GRANULARITY 10, - idx2 BY u64 * i32 TYPE minmax GRANULARITY 10; +ORDER BY u64; INSERT INTO test.minmax_idx2 VALUES (1, 2); INSERT INTO test.minmax_idx2 VALUES (1, 2); From f0a0dbbf5ae1dc5cd36fb52d7dc2551bfe151b05 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 21 Jan 2019 17:47:47 +0300 Subject: [PATCH 105/586] fixed bug with replMT indices storing in zk --- dbms/src/Storages/MergeTree/MergeTreeData.h | 1 - .../ReplicatedMergeTreeTableMetadata.cpp | 2 +- ...dices_alter_replicated_zookeeper.reference | 16 ++++++------- ...824_indices_alter_replicated_zookeeper.sql | 24 +++++++++---------- 4 files changed, 21 insertions(+), 22 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index d45a09b3b4d..8fd1a99843f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -587,7 +587,6 @@ public: /// Secondary (data skipping) indices for MergeTree MergeTreeIndices skip_indices; - ASTPtr skip_indices_ast; ExpressionActionsPtr skip_indices_expr; /// Names of columns for primary key + secondary sorting columns. diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 6dc2f511eaa..88b2e6ea1c7 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -45,7 +45,7 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr if (data.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) partition_key = formattedAST(MergeTreeData::extractKeyExpressionList(data.partition_by_ast)); - skip_indices = formattedAST(data.skip_indices_ast); + skip_indices = data.getIndicesDescription().toString(); } void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const diff --git a/dbms/tests/queries/0_stateless/00824_indices_alter_replicated_zookeeper.reference b/dbms/tests/queries/0_stateless/00824_indices_alter_replicated_zookeeper.reference index a56117e915b..ce3192788e1 100644 --- a/dbms/tests/queries/0_stateless/00824_indices_alter_replicated_zookeeper.reference +++ b/dbms/tests/queries/0_stateless/00824_indices_alter_replicated_zookeeper.reference @@ -1,5 +1,5 @@ -CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 INDICES idx1 BY u64 * i32 TYPE minmax GRANULARITY 10, idx3 BY u64 - i32 TYPE minmax GRANULARITY 10, idx2 BY u64 + i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 INDICES idx1 BY u64 * i32 TYPE minmax GRANULARITY 10, idx3 BY u64 - i32 TYPE minmax GRANULARITY 10, idx2 BY u64 + i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 1 2 1 2 1 2 @@ -14,8 +14,8 @@ CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMerg 3 2 19 9 65 75 -CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 INDICES idx3 BY u64 - i32 TYPE minmax GRANULARITY 10, idx2 BY u64 + i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 INDICES idx3 BY u64 - i32 TYPE minmax GRANULARITY 10, idx2 BY u64 + i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 1 2 1 4 1 5 @@ -30,8 +30,8 @@ CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMerg 65 75 CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 INDICES idx1 BY u64 * i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 INDICES idx1 BY u64 * i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 1 2 1 4 1 5 @@ -44,8 +44,8 @@ CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMerg 3 2 19 9 65 75 -CREATE TABLE test.minmax_idx2 ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r1\') ORDER BY u64 INDICES idx1 BY u64 + i32 TYPE minmax GRANULARITY 10, idx2 BY u64 * i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx2_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r2\') ORDER BY u64 INDICES idx1 BY u64 + i32 TYPE minmax GRANULARITY 10, idx2 BY u64 * i32 TYPE minmax GRANULARITY 10 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx2 ( u64 UInt64, i32 Int32, INDEX idx1 u64 + i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx2_r ( u64 UInt64, i32 Int32, INDEX idx1 u64 + i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 1 2 1 3 1 2 diff --git a/dbms/tests/queries/0_stateless/00824_indices_alter_replicated_zookeeper.sql b/dbms/tests/queries/0_stateless/00824_indices_alter_replicated_zookeeper.sql index 0b4a524464f..ccd71beb6ec 100644 --- a/dbms/tests/queries/0_stateless/00824_indices_alter_replicated_zookeeper.sql +++ b/dbms/tests/queries/0_stateless/00824_indices_alter_replicated_zookeeper.sql @@ -21,9 +21,9 @@ INSERT INTO test.minmax_idx VALUES (1, 2); SYSTEM SYNC REPLICA test.minmax_idx_r; -ALTER TABLE test.minmax_idx ADD INDEX idx1 BY u64 * i32 TYPE minmax GRANULARITY 10; -ALTER TABLE test.minmax_idx_r ADD INDEX idx2 BY u64 + i32 TYPE minmax GRANULARITY 10; -ALTER TABLE test.minmax_idx ADD INDEX idx3 BY u64 - i32 TYPE minmax GRANULARITY 10 AFTER idx1; +ALTER TABLE test.minmax_idx ADD INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10; +ALTER TABLE test.minmax_idx_r ADD INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10; +ALTER TABLE test.minmax_idx ADD INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10 AFTER idx1; SHOW CREATE TABLE test.minmax_idx; SHOW CREATE TABLE test.minmax_idx_r; @@ -57,7 +57,7 @@ ALTER TABLE test.minmax_idx_r DROP INDEX idx3; SHOW CREATE TABLE test.minmax_idx; SHOW CREATE TABLE test.minmax_idx_r; -ALTER TABLE test.minmax_idx ADD INDEX idx1 BY u64 * i32 TYPE minmax GRANULARITY 10; +ALTER TABLE test.minmax_idx ADD INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10; SHOW CREATE TABLE test.minmax_idx; SHOW CREATE TABLE test.minmax_idx_r; @@ -69,20 +69,20 @@ SELECT * FROM test.minmax_idx_r WHERE u64 * i32 > 1 ORDER BY (u64, i32); CREATE TABLE test.minmax_idx2 ( u64 UInt64, - i32 Int32 + i32 Int32, + INDEX idx1 u64 + i32 TYPE minmax GRANULARITY 10, + INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 10 ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/indices_alter2', 'r1') -ORDER BY u64 -INDICES idx1 BY u64 + i32 TYPE minmax GRANULARITY 10, - idx2 BY u64 * i32 TYPE minmax GRANULARITY 10; +ORDER BY u64; CREATE TABLE test.minmax_idx2_r ( u64 UInt64, - i32 Int32 + i32 Int32, + INDEX idx1 u64 + i32 TYPE minmax GRANULARITY 10, + INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 10 ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/indices_alter2', 'r2') -ORDER BY u64 -INDICES idx1 BY u64 + i32 TYPE minmax GRANULARITY 10, - idx2 BY u64 * i32 TYPE minmax GRANULARITY 10; +ORDER BY u64; SHOW CREATE TABLE test.minmax_idx2; From 08adef7027c4305178f256d5d331137ad1c4af7c Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 21 Jan 2019 20:17:21 +0300 Subject: [PATCH 106/586] rename --- dbms/src/Parsers/ParserCreateQuery.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index 2a7244071d6..b9c37a9c43f 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -308,7 +308,7 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr database; ASTPtr table; - ASTPtr columns; + ASTPtr columns_list; ASTPtr to_database; ASTPtr to_table; ASTPtr storage; @@ -376,7 +376,7 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) /// List of columns. if (s_lparen.ignore(pos, expected)) { - if (!columns_or_indices_p.parse(pos, columns, expected)) + if (!columns_or_indices_p.parse(pos, columns_list, expected)) return false; if (!s_rparen.ignore(pos, expected)) @@ -478,7 +478,7 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) /// Optional - a list of columns can be specified. It must fully comply with SELECT. if (s_lparen.ignore(pos, expected)) { - if (!columns_or_indices_p.parse(pos, columns, expected)) + if (!columns_or_indices_p.parse(pos, columns_list, expected)) return false; if (!s_rparen.ignore(pos, expected)) @@ -520,7 +520,7 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) getIdentifierName(to_database, query->to_database); getIdentifierName(to_table, query->to_table); - query->set(query->columns_list, columns); + query->set(query->columns_list, columns_list); query->set(query->storage, storage); getIdentifierName(as_database, query->as_database); From 51830221bd158b28c92d871c23ee22e31994271a Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 21 Jan 2019 20:54:27 +0300 Subject: [PATCH 107/586] refactoring --- dbms/src/Storages/AlterCommands.cpp | 4 ++-- dbms/src/Storages/AlterCommands.h | 4 ++-- dbms/src/Storages/IndicesDescription.cpp | 7 ------- 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index d04c31b8062..91877401fc4 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -162,8 +162,8 @@ static bool namesEqual(const String & name_without_dot, const DB::NameAndTypePai return (name_with_dot == name_type.name.substr(0, name_without_dot.length() + 1) || name_without_dot == name_type.name); } -void AlterCommand::apply(ColumnsDescription & columns_description, IndicesDescription & indices_description - , ASTPtr & order_by_ast, ASTPtr & primary_key_ast) const +void AlterCommand::apply(ColumnsDescription & columns_description, IndicesDescription & indices_description, + ASTPtr & order_by_ast, ASTPtr & primary_key_ast) const { if (type == ADD_COLUMN) { diff --git a/dbms/src/Storages/AlterCommands.h b/dbms/src/Storages/AlterCommands.h index 3833881b186..10307736813 100644 --- a/dbms/src/Storages/AlterCommands.h +++ b/dbms/src/Storages/AlterCommands.h @@ -80,8 +80,8 @@ struct AlterCommand static std::optional parse(const ASTAlterCommand * command); - void apply(ColumnsDescription & columns_description, IndicesDescription & indices_description - , ASTPtr & order_by_ast, ASTPtr & primary_key_ast) const; + void apply(ColumnsDescription & columns_description, IndicesDescription & indices_description, + ASTPtr & order_by_ast, ASTPtr & primary_key_ast) const; /// Checks that not only metadata touched by that command bool is_mutable() const; }; diff --git a/dbms/src/Storages/IndicesDescription.cpp b/dbms/src/Storages/IndicesDescription.cpp index c84f8b92998..e6c43da664d 100644 --- a/dbms/src/Storages/IndicesDescription.cpp +++ b/dbms/src/Storages/IndicesDescription.cpp @@ -1,12 +1,5 @@ #include -#include -#include -#include -#include -#include -#include - #include #include #include From 0c601eca8dbbdcb4afcae57d0a693dfbda3bec62 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 21 Jan 2019 21:35:29 +0300 Subject: [PATCH 108/586] docs ru --- docs/ru/operations/table_engines/mergetree.md | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/docs/ru/operations/table_engines/mergetree.md b/docs/ru/operations/table_engines/mergetree.md index d0410ccea58..441944ae0aa 100644 --- a/docs/ru/operations/table_engines/mergetree.md +++ b/docs/ru/operations/table_engines/mergetree.md @@ -32,6 +32,8 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], ... + INDEX index_name1 expr1 TYPE type1(...) GRANULARITY value1, + INDEX index_name2 expr2 TYPE type2(...) GRANULARITY value2 ) ENGINE = MergeTree() [PARTITION BY expr] [ORDER BY expr] @@ -224,6 +226,49 @@ SELECT count() FROM table WHERE CounterID = 34 OR URL LIKE '%upyachka%' Ключ партиционирования по месяцам обеспечивает чтение только тех блоков данных, которые содержат даты из нужного диапазона. При этом блок данных может содержать данные за многие даты (до целого месяца). В пределах одного блока данные упорядочены по первичному ключу, который может не содержать дату в качестве первого столбца. В связи с этим, при использовании запроса с указанием условия только на дату, но не на префикс первичного ключа, будет читаться данных больше, чем за одну дату. +## Дополнительные индексы + +Для таблиц семейства `*MergeTree` можно задать дополнительные индексы в секции столбцов. + +Индекс аггрегирует для заданного выражения некоторые данные, а потом при `SELECT` запросе использует их после первичного ключа для пропуска боков данных (пропускаемый блок состоих из гранул данных (в которых `index_granularity` строк) в количестве равном гранулярности данного индекса), на которых секция `WHERE` не может быть выполнена, тем самым уменьшая объем данных читаемых с диска. + +Пример +```sql +CREATE TABLE table_name +( + u64 UInt64, + i32 Int32, + s String, + ... + INDEX a (u64 * i32, s) TYPE minmax GRANULARITY 3, + INDEX b (u64 * length(s)) TYPE minmax GRANULARITY 4 +) ENGINE = MergeTree() +... +``` + +Эти индексы смогут использоваться для оптимизации запросов +```sql +SELECT count() FROM table WHERE s < 'z' +SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 +``` + +### Доступные индексы + +* `minmax` Хранит минимум и максимум выражения (если выражение - `tuple`, то для каждого элемента `tuple`), используя их пропуска кусков аналогично первичному ключу. + +Пример +```sql +INDEX b (u64 * length(s)) TYPE minmax GRANULARITY 4 +``` + +### ALTER Индексов + +Добавить или удалить индекс можно с помощью команд `ALTER ADD INDEX name expr TYPE type GRANULARITY value AFTER name` и `ALTER DROP INDEX name`. + +Команда `ALTER ADD INDEX` добавляет описание индексов в метаданные, а `ALTER DROP INDEX` удаляет индекс из метаданных и стирает файлы индекса с диска, поэтому работают они мгновенно. + +Если индекс есть в метаданных, то он начнет считаться в последующих слияниях и записях в таблицу. + ## Конкурентный доступ к данным Для конкурентного доступа к таблице используется мультиверсионность. То есть, при одновременном чтении и обновлении таблицы, данные будут читаться из набора кусочков, актуального на момент запроса. Длинных блокировок нет. Вставки никак не мешают чтениям. From 2d06486f01fbb614cf8542583f23bba4fe857d69 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 22 Jan 2019 16:55:16 +0300 Subject: [PATCH 109/586] docs ru --- docs/ru/operations/table_engines/mergetree.md | 8 -------- docs/ru/query_language/alter.md | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/docs/ru/operations/table_engines/mergetree.md b/docs/ru/operations/table_engines/mergetree.md index 441944ae0aa..03480863203 100644 --- a/docs/ru/operations/table_engines/mergetree.md +++ b/docs/ru/operations/table_engines/mergetree.md @@ -261,14 +261,6 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 INDEX b (u64 * length(s)) TYPE minmax GRANULARITY 4 ``` -### ALTER Индексов - -Добавить или удалить индекс можно с помощью команд `ALTER ADD INDEX name expr TYPE type GRANULARITY value AFTER name` и `ALTER DROP INDEX name`. - -Команда `ALTER ADD INDEX` добавляет описание индексов в метаданные, а `ALTER DROP INDEX` удаляет индекс из метаданных и стирает файлы индекса с диска, поэтому работают они мгновенно. - -Если индекс есть в метаданных, то он начнет считаться в последующих слияниях и записях в таблицу. - ## Конкурентный доступ к данным Для конкурентного доступа к таблице используется мультиверсионность. То есть, при одновременном чтении и обновлении таблицы, данные будут читаться из набора кусочков, актуального на момент запроса. Длинных блокировок нет. Вставки никак не мешают чтениям. diff --git a/docs/ru/query_language/alter.md b/docs/ru/query_language/alter.md index f8dd65e8c45..9e65bf5fa57 100644 --- a/docs/ru/query_language/alter.md +++ b/docs/ru/query_language/alter.md @@ -79,6 +79,20 @@ MODIFY ORDER BY new_expression сортировки, разрешено добавлять в ключ только новые столбцы (т.е. столбцы, добавляемые командой `ADD COLUMN` в том же запросе `ALTER`), у которых нет выражения по умолчанию. +### Манипуляции с индексами + +Добавить или удалить индекс можно с помощью операций +``` +ALTER ADD INDEX name expression TYPE type GRANULARITY value AFTER name +ALTER DROP INDEX name +``` +Поддерживается только таблицами семейства `*MergeTree`. + +Команда `ALTER ADD INDEX` добавляет описание индексов в метаданные, а `ALTER DROP INDEX` удаляет индекс из метаданных и стирает файлы индекса с диска, поэтому работают они мгновенно. + +Если индекс есть в метаданных, то он начнет считаться в последующих слияниях и записях в таблицу, а не сразу после выполнения операции `ALTER`. + +Запрос на изменение индексов реплицируется, сохраняя новые метаданные в ZooKeeper и применяя изменения на всех репликах. ### Манипуляции с партициями и кусками From 5ed3269324d0bd1b9bf20d1ccba3b9bba4a47ffc Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 22 Jan 2019 17:39:18 +0300 Subject: [PATCH 110/586] docs en --- docs/en/operations/table_engines/mergetree.md | 45 +++++++++++++++++++ docs/en/query_language/alter.md | 13 ++++++ docs/ru/operations/table_engines/mergetree.md | 6 +-- docs/ru/query_language/alter.md | 2 +- 4 files changed, 62 insertions(+), 4 deletions(-) diff --git a/docs/en/operations/table_engines/mergetree.md b/docs/en/operations/table_engines/mergetree.md index ec2c05b786f..dc82aefb054 100644 --- a/docs/en/operations/table_engines/mergetree.md +++ b/docs/en/operations/table_engines/mergetree.md @@ -34,6 +34,8 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], ... + INDEX index_name1 expr1 TYPE type1(...) GRANULARITY value1, + INDEX index_name2 expr2 TYPE type2(...) GRANULARITY value2 ) ENGINE = MergeTree() [PARTITION BY expr] [ORDER BY expr] @@ -225,6 +227,49 @@ To check whether ClickHouse can use the index when running a query, use the sett The key for partitioning by month allows reading only those data blocks which contain dates from the proper range. In this case, the data block may contain data for many dates (up to an entire month). Within a block, data is sorted by primary key, which might not contain the date as the first column. Because of this, using a query with only a date condition that does not specify the primary key prefix will cause more data to be read than for a single date. + +### Data Skipping Indices + +Index declaration in the columns section of create query. +```sql +INDEX index_name expr TYPE type(...) GRANULARITY granularity_value +``` + +For tables from the `*MergeTree` family data skipping indices can be specified. + +These indices aggregate some information about specified expression on blocks, which consist of `granularity_value` granules, +then these aggregates is used in `SELECT` queries for reducing amount of data to read from disk by skipping big blocks of data where `where` query can not be satisfied. + + +Example +```sql +CREATE TABLE table_name +( + u64 UInt64, + i32 Int32, + s String, + ... + INDEX a (u64 * i32, s) TYPE minmax GRANULARITY 3, + INDEX b (u64 * length(s)) TYPE minmax GRANULARITY 4 +) ENGINE = MergeTree() +... +``` + +Indices from the example can be used by ClickHouse to reduce amount of data read from disk in following queries. +```sql +SELECT count() FROM table WHERE s < 'z' +SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 +``` + +#### Available Types of Indices + +* `minmax` Stores extremes of specified expression (if expression is `tuple`, then it stores extremes for each element of `tuple`), uses stored info for skipping blocks of data like primary key. + +```sql +INDEX b (u64 * length(s)) TYPE minmax GRANULARITY 4 +``` + + ## Concurrent Data Access For concurrent table access, we use multi-versioning. In other words, when a table is simultaneously read and updated, data is read from a set of parts that is current at the time of the query. There are no lengthy locks. Inserts do not get in the way of read operations. diff --git a/docs/en/query_language/alter.md b/docs/en/query_language/alter.md index c3d504f07bb..877eef32d3a 100644 --- a/docs/en/query_language/alter.md +++ b/docs/en/query_language/alter.md @@ -83,6 +83,19 @@ rows are ordered by the sorting key expression you cannot add expressions contai to the sorting key (only columns added by the `ADD COLUMN` command in the same `ALTER` query). +### Manipulations With Data Skipping Indices + +It only works for tables in the [`MergeTree`](../operations/table_engines/mergetree.md) family (including +[replicated](../operations/table_engines/replication.md) tables). The following operations +are available: + +* `ALTER ADD INDEX name expression TYPE type GRANULARITY value AFTER name [AFTER name2]` - Adds index description to tables metadata. + +* `ALTER DROP INDEX name` - Removes index description from tables metadata and index files from disk. + +These commands are lightweight in sense that they only change metadata or remove files. +Also these operations are replicated. + ### Manipulations With Partitions and Parts It only works for tables in the [`MergeTree`](../operations/table_engines/mergetree.md) family (including diff --git a/docs/ru/operations/table_engines/mergetree.md b/docs/ru/operations/table_engines/mergetree.md index 03480863203..439665c119f 100644 --- a/docs/ru/operations/table_engines/mergetree.md +++ b/docs/ru/operations/table_engines/mergetree.md @@ -226,11 +226,11 @@ SELECT count() FROM table WHERE CounterID = 34 OR URL LIKE '%upyachka%' Ключ партиционирования по месяцам обеспечивает чтение только тех блоков данных, которые содержат даты из нужного диапазона. При этом блок данных может содержать данные за многие даты (до целого месяца). В пределах одного блока данные упорядочены по первичному ключу, который может не содержать дату в качестве первого столбца. В связи с этим, при использовании запроса с указанием условия только на дату, но не на префикс первичного ключа, будет читаться данных больше, чем за одну дату. -## Дополнительные индексы +### Дополнительные индексы Для таблиц семейства `*MergeTree` можно задать дополнительные индексы в секции столбцов. -Индекс аггрегирует для заданного выражения некоторые данные, а потом при `SELECT` запросе использует их после первичного ключа для пропуска боков данных (пропускаемый блок состоих из гранул данных (в которых `index_granularity` строк) в количестве равном гранулярности данного индекса), на которых секция `WHERE` не может быть выполнена, тем самым уменьшая объем данных читаемых с диска. +Индекс аггрегирует для заданного выражения некоторые данные, а потом при `SELECT` запросе использует их после первичного ключа для пропуска боков данных (пропускаемый блок состоих из гранул данных в количестве равном гранулярности данного индекса), на которых секция `WHERE` не может быть выполнена, тем самым уменьшая объем данных читаемых с диска. Пример ```sql @@ -252,7 +252,7 @@ SELECT count() FROM table WHERE s < 'z' SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 ``` -### Доступные индексы +#### Доступные индексы * `minmax` Хранит минимум и максимум выражения (если выражение - `tuple`, то для каждого элемента `tuple`), используя их пропуска кусков аналогично первичному ключу. diff --git a/docs/ru/query_language/alter.md b/docs/ru/query_language/alter.md index 9e65bf5fa57..5acbdac723f 100644 --- a/docs/ru/query_language/alter.md +++ b/docs/ru/query_language/alter.md @@ -83,7 +83,7 @@ MODIFY ORDER BY new_expression Добавить или удалить индекс можно с помощью операций ``` -ALTER ADD INDEX name expression TYPE type GRANULARITY value AFTER name +ALTER ADD INDEX name expression TYPE type GRANULARITY value [AFTER name] ALTER DROP INDEX name ``` Поддерживается только таблицами семейства `*MergeTree`. From 9ee50da8b6bb6decd5fa0aa8562a250ffff2a5b2 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 22 Jan 2019 18:58:11 +0300 Subject: [PATCH 111/586] refactor --- dbms/programs/copier/ClusterCopier.cpp | 2 ++ dbms/src/Interpreters/InterpreterCreateQuery.cpp | 2 +- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 4 ++-- .../Storages/MergeTree/MergeTreeDataMergerMutator.cpp | 2 +- .../Storages/MergeTree/MergeTreeDataMergerMutator.h | 2 +- .../Storages/MergeTree/MergeTreeDataSelectExecutor.cpp | 5 +++-- .../src/Storages/MergeTree/MergedBlockOutputStream.cpp | 10 ++++++---- dbms/src/Storages/MergeTree/checkDataPart.cpp | 2 +- dbms/src/Storages/MergeTree/checkDataPart.h | 2 +- .../Storages/MergeTree/registerStorageMergeTree.cpp | 3 +-- dbms/src/Storages/StorageReplicatedMergeTree.h | 2 +- 11 files changed, 20 insertions(+), 16 deletions(-) diff --git a/dbms/programs/copier/ClusterCopier.cpp b/dbms/programs/copier/ClusterCopier.cpp index e63f98fac7f..422f4fd3ada 100644 --- a/dbms/programs/copier/ClusterCopier.cpp +++ b/dbms/programs/copier/ClusterCopier.cpp @@ -1201,6 +1201,8 @@ protected: auto new_columns_list = std::make_shared(); new_columns_list->set(new_columns_list->columns, new_columns); + new_columns_list->set( + new_columns_list->indices, typeid_cast(*query_ast).columns_list->indices->clone()); new_query.replace(new_query.columns_list, new_columns_list); diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index e4f16f1ee34..d5c13bd88f7 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -408,7 +408,7 @@ ASTPtr InterpreterCreateQuery::formatIndices(const IndicesDescription & indices) auto res = std::make_shared(); for (const auto & index : indices.indices) - res->children.push_back(index); + res->children.push_back(index->clone()); return res; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 093df2309d1..10bfb3c2479 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -371,10 +371,10 @@ void MergeTreeData::setSkipIndices(const IndicesDescription & indices, bool only const auto & index_decl = std::dynamic_pointer_cast(index_ast); new_indices.push_back( - std::move(MergeTreeIndexFactory::instance().get( + MergeTreeIndexFactory::instance().get( *this, std::dynamic_pointer_cast(index_decl->clone()), - global_context))); + global_context)); if (names.find(new_indices.back()->name) != names.end()) throw Exception( diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index e2b75a503fc..e4f9e4e3b6f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -902,7 +902,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor Poco::File(new_part_tmp_path).createDirectories(); - BlockInputStreamPtr in = mutations_interpreter.execute(); + auto in = mutations_interpreter.execute(); NamesAndTypesList all_columns = data.getColumns().getAllPhysical(); Block in_header = in->getHeader(); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 318db13b296..58f5842b5f5 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -120,7 +120,7 @@ public: enum class MergeAlgorithm { Horizontal, /// per-row merge of all columns - Vertical /// per-row merge of PK and secondary indexes columns, per-column gather for non-PK columns + Vertical /// per-row merge of PK and secondary indices columns, per-column gather for non-PK columns }; private: diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index c40432a5ba1..2ecbf120bdf 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -975,6 +975,8 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( MarkRanges res; + /// Some granules can cover two or more ranges, + /// this variable is stored to avoid reading the same granule twice. MergeTreeIndexGranulePtr granule = nullptr; size_t last_index_mark = 0; for (const auto & range : ranges) @@ -983,9 +985,8 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( range.begin / index->granularity, (range.end + index->granularity - 1) / index->granularity); - if (last_index_mark != index_range.begin || !granule) { + if (last_index_mark != index_range.begin || !granule) reader.seek(index_range.begin); - } for (size_t index_mark = index_range.begin; index_mark < index_range.end; ++index_mark) { diff --git a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp index a1cc4250e99..9f6f1517295 100644 --- a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -330,7 +330,8 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart( for (size_t i = 0; i < storage.skip_indices.size(); ++i) { auto & stream = *skip_indices_streams[i]; - if (skip_indices_granules[i] && !skip_indices_granules[i]->empty()) { + if (skip_indices_granules[i] && !skip_indices_granules[i]->empty()) + { skip_indices_granules[i]->serializeBinary(stream.compressed); skip_indices_granules[i].reset(); } @@ -421,15 +422,16 @@ void MergedBlockOutputStream::init() index_stream = std::make_unique(*index_file_stream); } - for (const auto index : storage.skip_indices) { + for (const auto index : storage.skip_indices) + { String stream_name = index->getFileName(); skip_indices_streams.emplace_back( - std::move(std::make_unique( + std::make_unique( stream_name, part_path + stream_name, INDEX_FILE_EXTENSION, part_path + stream_name, MARKS_FILE_EXTENSION, codec, max_compress_block_size, - 0, aio_threshold))); + 0, aio_threshold)); skip_indices_granules.emplace_back(nullptr); skip_index_filling.push_back(0); diff --git a/dbms/src/Storages/MergeTree/checkDataPart.cpp b/dbms/src/Storages/MergeTree/checkDataPart.cpp index d0301b8fc27..f31506c3851 100644 --- a/dbms/src/Storages/MergeTree/checkDataPart.cpp +++ b/dbms/src/Storages/MergeTree/checkDataPart.cpp @@ -242,7 +242,7 @@ MergeTreeData::DataPart::Checksums checkDataPart( rows = count; } - /// Read and check skip indexes + /// Read and check skip indices for (const auto index : indices) { LOG_DEBUG(log, "Checking index " << index->name << " in " << path); diff --git a/dbms/src/Storages/MergeTree/checkDataPart.h b/dbms/src/Storages/MergeTree/checkDataPart.h index 2037b1334e6..5a223fce02c 100644 --- a/dbms/src/Storages/MergeTree/checkDataPart.h +++ b/dbms/src/Storages/MergeTree/checkDataPart.h @@ -17,7 +17,7 @@ MergeTreeData::DataPart::Checksums checkDataPart( size_t index_granularity, bool require_checksums, const DataTypes & primary_key_data_types, /// Check the primary key. If it is not necessary, pass an empty array. - const MergeTreeIndices & indices = {}, /// Check skip indexes + const MergeTreeIndices & indices = {}, /// Check skip indices std::function is_cancelled = []{ return false; }); } diff --git a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp index 98bccc8d30f..b07f8f6e1d1 100644 --- a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -575,11 +575,10 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (args.storage_def->sample_by) sample_by_ast = args.storage_def->sample_by->ptr(); - if (args.query.columns_list && args.query.columns_list->indices) { + if (args.query.columns_list && args.query.columns_list->indices) for (const auto & index : args.query.columns_list->indices->children) indices_description.indices.push_back( std::dynamic_pointer_cast(index->ptr())); - } storage_settings.loadFromQuery(*args.storage_def); } diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index 50963c4c4b2..927679dbf8c 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -540,7 +540,7 @@ private: void clearColumnInPartition(const ASTPtr & partition, const Field & column_name, const Context & query_context); void dropPartition(const ASTPtr & query, const ASTPtr & partition, bool detach, const Context & query_context); void attachPartition(const ASTPtr & partition, bool part, const Context & query_context); - void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & query_context); + void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & query_context); void fetchPartition(const ASTPtr & partition, const String & from, const Context & query_context); protected: From 2c16614e7b708fab7d33ef2c2dce625c472e3988 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 22 Jan 2019 19:01:04 +0300 Subject: [PATCH 112/586] rename tests --- ...{00823_minmax_index.reference => 00825_minmax_index.reference} | 0 .../{00823_minmax_index.sql => 00825_minmax_index.sql} | 0 ...eference => 00825_minmax_index_replicated_zookeeper.reference} | 0 ..._zookeeper.sql => 00825_minmax_index_replicated_zookeeper.sql} | 0 ...0824_indices_alter.reference => 00826_indices_alter.reference} | 0 .../{00824_indices_alter.sql => 00826_indices_alter.sql} | 0 ...ference => 00826_indices_alter_replicated_zookeeper.reference} | 0 ...zookeeper.sql => 00826_indices_alter_replicated_zookeeper.sql} | 0 8 files changed, 0 insertions(+), 0 deletions(-) rename dbms/tests/queries/0_stateless/{00823_minmax_index.reference => 00825_minmax_index.reference} (100%) rename dbms/tests/queries/0_stateless/{00823_minmax_index.sql => 00825_minmax_index.sql} (100%) rename dbms/tests/queries/0_stateless/{00823_minmax_index_replicated_zookeeper.reference => 00825_minmax_index_replicated_zookeeper.reference} (100%) rename dbms/tests/queries/0_stateless/{00823_minmax_index_replicated_zookeeper.sql => 00825_minmax_index_replicated_zookeeper.sql} (100%) rename dbms/tests/queries/0_stateless/{00824_indices_alter.reference => 00826_indices_alter.reference} (100%) rename dbms/tests/queries/0_stateless/{00824_indices_alter.sql => 00826_indices_alter.sql} (100%) rename dbms/tests/queries/0_stateless/{00824_indices_alter_replicated_zookeeper.reference => 00826_indices_alter_replicated_zookeeper.reference} (100%) rename dbms/tests/queries/0_stateless/{00824_indices_alter_replicated_zookeeper.sql => 00826_indices_alter_replicated_zookeeper.sql} (100%) diff --git a/dbms/tests/queries/0_stateless/00823_minmax_index.reference b/dbms/tests/queries/0_stateless/00825_minmax_index.reference similarity index 100% rename from dbms/tests/queries/0_stateless/00823_minmax_index.reference rename to dbms/tests/queries/0_stateless/00825_minmax_index.reference diff --git a/dbms/tests/queries/0_stateless/00823_minmax_index.sql b/dbms/tests/queries/0_stateless/00825_minmax_index.sql similarity index 100% rename from dbms/tests/queries/0_stateless/00823_minmax_index.sql rename to dbms/tests/queries/0_stateless/00825_minmax_index.sql diff --git a/dbms/tests/queries/0_stateless/00823_minmax_index_replicated_zookeeper.reference b/dbms/tests/queries/0_stateless/00825_minmax_index_replicated_zookeeper.reference similarity index 100% rename from dbms/tests/queries/0_stateless/00823_minmax_index_replicated_zookeeper.reference rename to dbms/tests/queries/0_stateless/00825_minmax_index_replicated_zookeeper.reference diff --git a/dbms/tests/queries/0_stateless/00823_minmax_index_replicated_zookeeper.sql b/dbms/tests/queries/0_stateless/00825_minmax_index_replicated_zookeeper.sql similarity index 100% rename from dbms/tests/queries/0_stateless/00823_minmax_index_replicated_zookeeper.sql rename to dbms/tests/queries/0_stateless/00825_minmax_index_replicated_zookeeper.sql diff --git a/dbms/tests/queries/0_stateless/00824_indices_alter.reference b/dbms/tests/queries/0_stateless/00826_indices_alter.reference similarity index 100% rename from dbms/tests/queries/0_stateless/00824_indices_alter.reference rename to dbms/tests/queries/0_stateless/00826_indices_alter.reference diff --git a/dbms/tests/queries/0_stateless/00824_indices_alter.sql b/dbms/tests/queries/0_stateless/00826_indices_alter.sql similarity index 100% rename from dbms/tests/queries/0_stateless/00824_indices_alter.sql rename to dbms/tests/queries/0_stateless/00826_indices_alter.sql diff --git a/dbms/tests/queries/0_stateless/00824_indices_alter_replicated_zookeeper.reference b/dbms/tests/queries/0_stateless/00826_indices_alter_replicated_zookeeper.reference similarity index 100% rename from dbms/tests/queries/0_stateless/00824_indices_alter_replicated_zookeeper.reference rename to dbms/tests/queries/0_stateless/00826_indices_alter_replicated_zookeeper.reference diff --git a/dbms/tests/queries/0_stateless/00824_indices_alter_replicated_zookeeper.sql b/dbms/tests/queries/0_stateless/00826_indices_alter_replicated_zookeeper.sql similarity index 100% rename from dbms/tests/queries/0_stateless/00824_indices_alter_replicated_zookeeper.sql rename to dbms/tests/queries/0_stateless/00826_indices_alter_replicated_zookeeper.sql From 71e61d82994ff4d7b4bb816068f50892e1c35c5f Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 22 Jan 2019 21:22:16 +0300 Subject: [PATCH 113/586] fix docs --- docs/en/operations/table_engines/mergetree.md | 4 ++-- docs/en/query_language/alter.md | 6 +++--- docs/ru/operations/table_engines/mergetree.md | 8 ++++---- docs/ru/query_language/alter.md | 4 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/en/operations/table_engines/mergetree.md b/docs/en/operations/table_engines/mergetree.md index dc82aefb054..bb57c76a930 100644 --- a/docs/en/operations/table_engines/mergetree.md +++ b/docs/en/operations/table_engines/mergetree.md @@ -255,7 +255,7 @@ CREATE TABLE table_name ... ``` -Indices from the example can be used by ClickHouse to reduce amount of data read from disk in following queries. +Indices from the example can be used by ClickHouse to reduce amount of data to read from disk in following queries. ```sql SELECT count() FROM table WHERE s < 'z' SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 @@ -266,7 +266,7 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 * `minmax` Stores extremes of specified expression (if expression is `tuple`, then it stores extremes for each element of `tuple`), uses stored info for skipping blocks of data like primary key. ```sql -INDEX b (u64 * length(s)) TYPE minmax GRANULARITY 4 +INDEX sample_index (u64 * length(s)) TYPE minmax GRANULARITY 4 ``` diff --git a/docs/en/query_language/alter.md b/docs/en/query_language/alter.md index 877eef32d3a..b454fa06969 100644 --- a/docs/en/query_language/alter.md +++ b/docs/en/query_language/alter.md @@ -85,16 +85,16 @@ to the sorting key (only columns added by the `ADD COLUMN` command in the same ` ### Manipulations With Data Skipping Indices -It only works for tables in the [`MergeTree`](../operations/table_engines/mergetree.md) family (including +It only works for tables in the [`*MergeTree`](../operations/table_engines/mergetree.md) family (including [replicated](../operations/table_engines/replication.md) tables). The following operations are available: * `ALTER ADD INDEX name expression TYPE type GRANULARITY value AFTER name [AFTER name2]` - Adds index description to tables metadata. -* `ALTER DROP INDEX name` - Removes index description from tables metadata and index files from disk. +* `ALTER DROP INDEX name` - Removes index description from tables metadata and deletes index files from disk. These commands are lightweight in sense that they only change metadata or remove files. -Also these operations are replicated. +Also they are replicated (syncing indices metadata through ZooKeeper). ### Manipulations With Partitions and Parts diff --git a/docs/ru/operations/table_engines/mergetree.md b/docs/ru/operations/table_engines/mergetree.md index 439665c119f..0f949319de1 100644 --- a/docs/ru/operations/table_engines/mergetree.md +++ b/docs/ru/operations/table_engines/mergetree.md @@ -230,7 +230,7 @@ SELECT count() FROM table WHERE CounterID = 34 OR URL LIKE '%upyachka%' Для таблиц семейства `*MergeTree` можно задать дополнительные индексы в секции столбцов. -Индекс аггрегирует для заданного выражения некоторые данные, а потом при `SELECT` запросе использует их после первичного ключа для пропуска боков данных (пропускаемый блок состоих из гранул данных в количестве равном гранулярности данного индекса), на которых секция `WHERE` не может быть выполнена, тем самым уменьшая объем данных читаемых с диска. +Индексы аггрегируют для заданного выражения некоторые данные, а потом при `SELECT` запросе используют для пропуска боков данных (пропускаемый блок состоих из гранул данных в количестве равном гранулярности данного индекса), на которых секция `WHERE` не может быть выполнена, тем самым уменьшая объем данных читаемых с диска. Пример ```sql @@ -246,7 +246,7 @@ CREATE TABLE table_name ... ``` -Эти индексы смогут использоваться для оптимизации запросов +Эти индексы смогут использоваться для оптимизации следующих запросов ```sql SELECT count() FROM table WHERE s < 'z' SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 @@ -254,11 +254,11 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 #### Доступные индексы -* `minmax` Хранит минимум и максимум выражения (если выражение - `tuple`, то для каждого элемента `tuple`), используя их пропуска кусков аналогично первичному ключу. +* `minmax` Хранит минимум и максимум выражения (если выражение - `tuple`, то для каждого элемента `tuple`), используя их для пропуска кусков аналогично первичному ключу. Пример ```sql -INDEX b (u64 * length(s)) TYPE minmax GRANULARITY 4 +INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE minmax GRANULARITY 4 ``` ## Конкурентный доступ к данным diff --git a/docs/ru/query_language/alter.md b/docs/ru/query_language/alter.md index 5acbdac723f..14dd57be2e7 100644 --- a/docs/ru/query_language/alter.md +++ b/docs/ru/query_language/alter.md @@ -88,9 +88,9 @@ ALTER DROP INDEX name ``` Поддерживается только таблицами семейства `*MergeTree`. -Команда `ALTER ADD INDEX` добавляет описание индексов в метаданные, а `ALTER DROP INDEX` удаляет индекс из метаданных и стирает файлы индекса с диска, поэтому работают они мгновенно. +Команда `ALTER ADD INDEX` добавляет описание индексов в метаданные, а `ALTER DROP INDEX` удаляет индекс из метаданных и стирает файлы индекса с диска, поэтому они легковесные и работают мгновенно. -Если индекс есть в метаданных, то он начнет считаться в последующих слияниях и записях в таблицу, а не сразу после выполнения операции `ALTER`. +Если индекс появился в метаданных, то он начнет считаться в последующих слияниях и записях в таблицу, а не сразу после выполнения операции `ALTER`. Запрос на изменение индексов реплицируется, сохраняя новые метаданные в ZooKeeper и применяя изменения на всех репликах. From d8f3bf788219f682a7a476b0b6a4591cf93178c5 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 22 Jan 2019 22:43:52 +0300 Subject: [PATCH 114/586] refactoring --- dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp | 7 +++++++ dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h | 6 ------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp index a79af5a52d6..cbb9fe43ea8 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp @@ -9,6 +9,13 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int INCORRECT_QUERY; +} + + MergeTreeMinMaxGranule::MergeTreeMinMaxGranule(const MergeTreeMinMaxIndex & index) : MergeTreeIndexGranule(), index(index), parallelogram() { diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h index 5551b69058c..9ccadaf1f0e 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h @@ -10,12 +10,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; - extern const int INCORRECT_QUERY; -} - class MergeTreeMinMaxIndex; struct MergeTreeMinMaxGranule : public MergeTreeIndexGranule From 296ad9163c695ed6df38793bdd3fa571b992f35d Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 23 Jan 2019 12:10:52 +0300 Subject: [PATCH 115/586] fix --- dbms/src/Parsers/ASTCreateQuery.h | 7 ++++--- dbms/src/Parsers/ParserCreateQuery.cpp | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/dbms/src/Parsers/ASTCreateQuery.h b/dbms/src/Parsers/ASTCreateQuery.h index 9315cd17761..a062e5a596c 100644 --- a/dbms/src/Parsers/ASTCreateQuery.h +++ b/dbms/src/Parsers/ASTCreateQuery.h @@ -119,10 +119,11 @@ private: s.ostr << s.nl_or_ws << indent_str; s.ostr << (s.hilite ? hilite_keyword : "") << prefix << (s.hilite ? hilite_none : ""); - FormatStateStacked frame_nested = frame; - ++frame_nested.indent; + FormatSettings nested_settings = s; + nested_settings.one_line = true; + nested_settings.nl_or_ws = ' '; - elem->formatImpl(s, state, frame_nested); + elem->formatImpl(nested_settings, state, frame); } }; public: diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index b9c37a9c43f..66083ae5fab 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -179,9 +179,9 @@ bool ParserColumnsOrIndicesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, for (const auto & elem : list->children) { - if (dynamic_cast(elem.get())) + if (typeid_cast(elem.get())) columns->children.push_back(elem); - else if (dynamic_cast(elem.get())) + else if (typeid_cast(elem.get())) indices->children.push_back(elem); else return false; From c8e605327d7ecdcae31fe9d2849190f6ff70e255 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Mon, 21 Jan 2019 17:02:03 +0300 Subject: [PATCH 116/586] Split StorageKafka.cpp on smaller files per class --- .../Storages/Kafka/KafkaBlockInputStream.cpp | 101 ++++++++ .../Storages/Kafka/KafkaBlockInputStream.h | 37 +++ .../Kafka/ReadBufferFromKafkaConsumer.cpp | 62 +++++ .../Kafka/ReadBufferFromKafkaConsumer.h | 44 ++++ dbms/src/Storages/Kafka/StorageKafka.cpp | 222 ++---------------- 5 files changed, 266 insertions(+), 200 deletions(-) create mode 100644 dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp create mode 100644 dbms/src/Storages/Kafka/KafkaBlockInputStream.h create mode 100644 dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp create mode 100644 dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp new file mode 100644 index 00000000000..f4f5288a216 --- /dev/null +++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp @@ -0,0 +1,101 @@ +#include + +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int TIMEOUT_EXCEEDED; +} // namespace ErrorCodes + +KafkaBlockInputStream::KafkaBlockInputStream( + StorageKafka & storage_, const Context & context_, const String & schema, size_t max_block_size_) + : storage(storage_), context(context_), max_block_size(max_block_size_) +{ + // Always skip unknown fields regardless of the context (JSON or TSKV) + context.setSetting("input_format_skip_unknown_fields", 1u); + + // We don't use ratio since the number of Kafka messages may vary from stream to stream. + // Thus, ratio is meaningless. + context.setSetting("input_format_allow_errors_ratio", 1.); + context.setSetting("input_format_allow_errors_num", storage.skip_broken); + + if (schema.size() > 0) + context.setSetting("format_schema", schema); +} + +KafkaBlockInputStream::~KafkaBlockInputStream() +{ + if (!hasClaimed()) + return; + + // An error was thrown during the stream or it did not finish successfully + // The read offsets weren't comitted, so consumer must rejoin the group from the original starting point + if (!finalized) + { + LOG_TRACE(storage.log, "KafkaBlockInputStream did not finish successfully, unsubscribing from assignments and rejoining"); + consumer->unsubscribe(); + consumer->subscribe(storage.topics); + } + + // Return consumer for another reader + storage.pushConsumer(consumer); + consumer = nullptr; +} + +String KafkaBlockInputStream::getName() const +{ + return storage.getName(); +} + +Block KafkaBlockInputStream::readImpl() +{ + if (isCancelledOrThrowIfKilled() || !hasClaimed()) + return {}; + + if (!reader) + throw Exception("Logical error: reader is not initialized", ErrorCodes::LOGICAL_ERROR); + + return reader->read(); +} + +Block KafkaBlockInputStream::getHeader() const +{ + return storage.getSampleBlock(); +} + +void KafkaBlockInputStream::readPrefixImpl() +{ + if (!hasClaimed()) + { + // Create a formatted reader on Kafka messages + LOG_TRACE(storage.log, "Creating formatted reader"); + consumer = storage.tryClaimConsumer(context.getSettingsRef().queue_max_wait_ms.totalMilliseconds()); + if (consumer == nullptr) + throw Exception("Failed to claim consumer: ", ErrorCodes::TIMEOUT_EXCEEDED); + + read_buf = std::make_unique(consumer, storage.log, storage.row_delimiter); + reader = FormatFactory::instance().getInput(storage.format_name, *read_buf, storage.getSampleBlock(), context, max_block_size); + } + + // Start reading data + finalized = false; + reader->readPrefix(); +} + +void KafkaBlockInputStream::readSuffixImpl() +{ + if (hasClaimed()) + { + reader->readSuffix(); + // Store offsets read in this stream + read_buf->commit(); + } + + // Mark as successfully finished + finalized = true; +} + +} // namespace DB diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.h b/dbms/src/Storages/Kafka/KafkaBlockInputStream.h new file mode 100644 index 00000000000..30c7bb9ea2c --- /dev/null +++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.h @@ -0,0 +1,37 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ +class StorageKafka; + +class KafkaBlockInputStream : public IProfilingBlockInputStream +{ +public: + KafkaBlockInputStream(StorageKafka & storage_, const Context & context_, const String & schema, size_t max_block_size_); + ~KafkaBlockInputStream() override; + + String getName() const override; + Block readImpl() override; + Block getHeader() const override; + void readPrefixImpl() override; + void readSuffixImpl() override; + +private: + StorageKafka & storage; + ConsumerPtr consumer; + Context context; + size_t max_block_size; + Block sample_block; + std::unique_ptr read_buf; + BlockInputStreamPtr reader; + bool finalized = false; + + // Return true if consumer has been claimed by the stream + bool hasClaimed() { return consumer != nullptr; } +}; + +} // namespace DB diff --git a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp new file mode 100644 index 00000000000..b417996e2ba --- /dev/null +++ b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp @@ -0,0 +1,62 @@ +#include + +namespace DB +{ + +namespace +{ + /// How long to wait for a single message (applies to each individual message) + const auto READ_POLL_MS = 500; +} // namespace + +bool ReadBufferFromKafkaConsumer::nextImpl() +{ + if (current_pending) + { + // XXX: very fishy place with const casting. + BufferBase::set( + reinterpret_cast(const_cast(current.get_payload().get_data())), current.get_payload().get_size(), 0); + current_pending = false; + return true; + } + + // Process next buffered message + auto message = consumer->poll(std::chrono::milliseconds(READ_POLL_MS)); + if (!message) + return false; + + if (message.is_eof()) + { + // Reached EOF while reading current batch, skip it. + LOG_TRACE(log, "EOF reached for partition " << message.get_partition() << " offset " << message.get_offset()); + return nextImpl(); + } + else if (auto err = message.get_error()) + { + LOG_ERROR(log, "Consumer error: " << err); + return false; + } + + ++read_messages; + + // Now we've received a new message. Check if we need to produce a delimiter + if (row_delimiter != '\0' && current) + { + BufferBase::set(&row_delimiter, 1, 0); + current = std::move(message); + current_pending = true; + return true; + } + + // Consume message and mark the topic/partition offset + // The offsets will be committed in the readSuffix() method after the block is completed + // If an exception is thrown before that would occur, the client will rejoin without committing offsets + current = std::move(message); + + // XXX: very fishy place with const casting. + BufferBase::set( + reinterpret_cast(const_cast(current.get_payload().get_data())), current.get_payload().get_size(), 0); + return true; +} + +} // namespace DB diff --git a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h new file mode 100644 index 00000000000..ed1a734ebb5 --- /dev/null +++ b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h @@ -0,0 +1,44 @@ +#pragma once + +#include +#include + +#include + +namespace DB +{ +using ConsumerPtr = std::shared_ptr; + +class ReadBufferFromKafkaConsumer : public ReadBuffer +{ +public: + ReadBufferFromKafkaConsumer(ConsumerPtr consumer_, Poco::Logger * log_, char row_delimiter_) + : ReadBuffer(nullptr, 0), consumer(consumer_), log(log_), row_delimiter(row_delimiter_) + { + if (row_delimiter != '\0') + LOG_TRACE(log, "Row delimiter is: " << row_delimiter); + } + + /// Commit messages read with this consumer + void commit() + { + LOG_TRACE(log, "Committing " << read_messages << " messages"); + if (read_messages == 0) + return; + + consumer->async_commit(); + read_messages = 0; + } + +private: + ConsumerPtr consumer; + cppkafka::Message current; + bool current_pending = false; /// We've fetched "current" message and need to process it on the next iteration. + Poco::Logger * log; + size_t read_messages = 0; + char row_delimiter; + + bool nextImpl() override; +}; + +} // namespace DB diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp index 2d8b85ad639..bc2bce29dd3 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.cpp +++ b/dbms/src/Storages/Kafka/StorageKafka.cpp @@ -6,9 +6,6 @@ #include #include #include -#include -#include -#include #include #include #include @@ -16,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -42,210 +40,31 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int BAD_ARGUMENTS; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int TIMEOUT_EXCEEDED; } -using namespace Poco::Util; - -/// How long to wait for a single message (applies to each individual message) -static const auto READ_POLL_MS = 500; -static const auto CLEANUP_TIMEOUT_MS = 3000; - -/// Configuration prefix -static const String CONFIG_PREFIX = "kafka"; - -class ReadBufferFromKafkaConsumer : public ReadBuffer +namespace { - ConsumerPtr consumer; - cppkafka::Message current; - bool current_pending = false; /// We've fetched "current" message and need to process it on the next iteration. - Poco::Logger * log; - size_t read_messages = 0; - char row_delimiter; + const auto RESCHEDULE_MS = 500; + const auto CLEANUP_TIMEOUT_MS = 3000; - bool nextImpl() override + /// Configuration prefix + const String CONFIG_PREFIX = "kafka"; + + void loadFromConfig(cppkafka::Configuration & conf, const Poco::Util::AbstractConfiguration & config, const std::string & path) { - if (current_pending) + Poco::Util::AbstractConfiguration::Keys keys; + std::vector errstr(512); + + config.keys(path, keys); + + for (const auto & key : keys) { - // XXX: very fishy place with const casting. - BufferBase::set(reinterpret_cast(const_cast(current.get_payload().get_data())), current.get_payload().get_size(), 0); - current_pending = false; - return true; + const String key_path = path + "." + key; + const String key_name = boost::replace_all_copy(key, "_", "."); + conf.set(key_name, config.getString(key_path)); } - - // Process next buffered message - auto message = consumer->poll(std::chrono::milliseconds(READ_POLL_MS)); - if (!message) - return false; - - if (message.is_eof()) - { - // Reached EOF while reading current batch, skip it. - LOG_TRACE(log, "EOF reached for partition " << message.get_partition() << " offset " << message.get_offset()); - return nextImpl(); - } - else if (auto err = message.get_error()) - { - LOG_ERROR(log, "Consumer error: " << err); - return false; - } - - ++read_messages; - - // Now we've received a new message. Check if we need to produce a delimiter - if (row_delimiter != '\0' && current) - { - BufferBase::set(&row_delimiter, 1, 0); - current = std::move(message); - current_pending = true; - return true; - } - - // Consume message and mark the topic/partition offset - // The offsets will be committed in the readSuffix() method after the block is completed - // If an exception is thrown before that would occur, the client will rejoin without committing offsets - current = std::move(message); - - // XXX: very fishy place with const casting. - BufferBase::set(reinterpret_cast(const_cast(current.get_payload().get_data())), current.get_payload().get_size(), 0); - return true; } - -public: - ReadBufferFromKafkaConsumer(ConsumerPtr consumer_, Poco::Logger * log_, char row_delimiter_) - : ReadBuffer(nullptr, 0), consumer(consumer_), log(log_), row_delimiter(row_delimiter_) - { - if (row_delimiter != '\0') - LOG_TRACE(log, "Row delimiter is: " << row_delimiter); - } - - /// Commit messages read with this consumer - void commit() - { - LOG_TRACE(log, "Committing " << read_messages << " messages"); - if (read_messages == 0) - return; - - consumer->async_commit(); - read_messages = 0; - } -}; - -class KafkaBlockInputStream : public IBlockInputStream -{ -public: - KafkaBlockInputStream(StorageKafka & storage_, const Context & context_, const String & schema, size_t max_block_size_) - : storage(storage_), context(context_), max_block_size(max_block_size_) - { - // Always skip unknown fields regardless of the context (JSON or TSKV) - context.setSetting("input_format_skip_unknown_fields", 1u); - - // We don't use ratio since the number of Kafka messages may vary from stream to stream. - // Thus, ratio is meaningless. - context.setSetting("input_format_allow_errors_ratio", 1.); - context.setSetting("input_format_allow_errors_num", storage.skip_broken); - - if (schema.size() > 0) - context.setSetting("format_schema", schema); - } - - ~KafkaBlockInputStream() override - { - if (!hasClaimed()) - return; - - // An error was thrown during the stream or it did not finish successfully - // The read offsets weren't committed, so consumer must rejoin the group from the original starting point - if (!finalized) - { - LOG_TRACE(storage.log, "KafkaBlockInputStream did not finish successfully, unsubscribing from assignments and rejoining"); - consumer->unsubscribe(); - consumer->subscribe(storage.topics); - } - - // Return consumer for another reader - storage.pushConsumer(consumer); - consumer = nullptr; - } - - String getName() const override - { - return storage.getName(); - } - - Block readImpl() override - { - if (isCancelledOrThrowIfKilled() || !hasClaimed()) - return {}; - - if (!reader) - throw Exception("Logical error: reader is not initialized", ErrorCodes::LOGICAL_ERROR); - - return reader->read(); - } - - Block getHeader() const override { return storage.getSampleBlock(); } - - void readPrefixImpl() override - { - if (!hasClaimed()) - { - // Create a formatted reader on Kafka messages - LOG_TRACE(storage.log, "Creating formatted reader"); - consumer = storage.tryClaimConsumer(context.getSettingsRef().queue_max_wait_ms.totalMilliseconds()); - if (consumer == nullptr) - throw Exception("Failed to claim consumer: ", ErrorCodes::TIMEOUT_EXCEEDED); - - read_buf = std::make_unique(consumer, storage.log, storage.row_delimiter); - reader = FormatFactory::instance().getInput(storage.format_name, *read_buf, storage.getSampleBlock(), context, max_block_size); - } - - // Start reading data - finalized = false; - reader->readPrefix(); - } - - void readSuffixImpl() override - { - if (hasClaimed()) - { - reader->readSuffix(); - // Store offsets read in this stream - read_buf->commit(); - } - - // Mark as successfully finished - finalized = true; - } - -private: - StorageKafka & storage; - ConsumerPtr consumer; - Context context; - size_t max_block_size; - Block sample_block; - std::unique_ptr read_buf; - BlockInputStreamPtr reader; - bool finalized = false; - - // Return true if consumer has been claimed by the stream - bool hasClaimed() { return consumer != nullptr; } -}; - -static void loadFromConfig(cppkafka::Configuration & conf, const AbstractConfiguration & config, const std::string & path) -{ - AbstractConfiguration::Keys keys; - std::vector errstr(512); - - config.keys(path, keys); - - for (const auto & key : keys) - { - const String key_path = path + "." + key; - const String key_name = boost::replace_all_copy(key, "_", "."); - conf.set(key_name, config.getString(key_path)); - } -} +} // namespace StorageKafka::StorageKafka( const std::string & table_name_, @@ -361,6 +180,9 @@ cppkafka::Configuration StorageKafka::createConsumerConfiguration() // We manually commit offsets after a stream successfully finished conf.set("enable.auto.commit", "false"); + // for debug logs inside rdkafka + conf.set("debug", "consumer,cgrp,topic,fetch"); + // Update consumer configuration from the configuration const auto & config = global_context.getConfigRef(); if (config.has(CONFIG_PREFIX)) @@ -461,7 +283,7 @@ void StorageKafka::streamThread() // Wait for attached views if (!stream_cancelled) - task->scheduleAfter(READ_POLL_MS); + task->scheduleAfter(RESCHEDULE_MS); } From f4e5e74e77d52216b029ae09c8044fa3904be0ca Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Tue, 22 Jan 2019 13:34:59 +0300 Subject: [PATCH 117/586] Update cppkafka --- contrib/cppkafka | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/cppkafka b/contrib/cppkafka index 520465510ef..efa4e95a184 160000 --- a/contrib/cppkafka +++ b/contrib/cppkafka @@ -1 +1 @@ -Subproject commit 520465510efef7704346cf8d140967c4abb057c1 +Subproject commit efa4e95a18458fac333f45a13c4ed55d6d3e3a00 From 425d483cfb1088a7f72dd91e17ab7dd9f11e862c Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Tue, 22 Jan 2019 15:18:18 +0300 Subject: [PATCH 118/586] Add integration tests --- dbms/src/Storages/Kafka/StorageKafka.cpp | 2 +- .../integration/test_storage_kafka/test.py | 72 ++++++++++++++++--- 2 files changed, 64 insertions(+), 10 deletions(-) diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp index bc2bce29dd3..d6b58c6720e 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.cpp +++ b/dbms/src/Storages/Kafka/StorageKafka.cpp @@ -181,7 +181,7 @@ cppkafka::Configuration StorageKafka::createConsumerConfiguration() conf.set("enable.auto.commit", "false"); // for debug logs inside rdkafka - conf.set("debug", "consumer,cgrp,topic,fetch"); + // conf.set("debug", "consumer,cgrp,topic,fetch"); // Update consumer configuration from the configuration const auto & config = global_context.getConfigRef(); diff --git a/dbms/tests/integration/test_storage_kafka/test.py b/dbms/tests/integration/test_storage_kafka/test.py index a591a343c5f..4505b9745ed 100644 --- a/dbms/tests/integration/test_storage_kafka/test.py +++ b/dbms/tests/integration/test_storage_kafka/test.py @@ -9,6 +9,11 @@ import json import subprocess +# TODO: add test for run-time offset update in CH, if we manually update it on Kafka side. +# TODO: add test for mat. view is working. +# TODO: add test for SELECT LIMIT is working. +# TODO: modify tests to respect `skip_broken_messages` setting. + cluster = ClickHouseCluster(__file__) instance = cluster.add_instance('instance', main_configs=['configs/kafka.xml'], @@ -55,7 +60,7 @@ def kafka_produce(kafka_id, topic, messages): p.stdin.close() -def kafka_check_json_numbers(instance): +def kafka_check_json_numbers(instance, insert_malformed=False, table='test.kafka'): retries = 0 while True: if kafka_is_available(instance.cluster.kafka_docker_id): @@ -66,15 +71,27 @@ def kafka_check_json_numbers(instance): raise 'Cannot connect to kafka.' print("Waiting for kafka to be available...") time.sleep(1) + messages = '' - for i in range(50): + for i in range(25): messages += json.dumps({'key': i, 'value': i}) + '\n' kafka_produce(instance.cluster.kafka_docker_id, 'json', messages) - for i in range(30): - result = instance.query('SELECT * FROM test.kafka;') - if result: - break - time.sleep(0.5) + + if insert_malformed: + # Insert couple of malformed messages. + kafka_produce(instance.cluster.kafka_docker_id, 'json', '}{very_broken_message,\n') + kafka_produce(instance.cluster.kafka_docker_id, 'json', '}{very_broken_message,\n') + + messages = '' + for i in range(25, 50): + messages += json.dumps({'key': i, 'value': i}) + '\n' + kafka_produce(instance.cluster.kafka_docker_id, 'json', messages) + + # XXX: since the broken message breaks the `select` reading + # we'll try to select a limited number of times. + for i in range(3): + instance.query('SELECT * FROM {};'.format(table)) + time.sleep(1) fpath = p.join(p.dirname(__file__), 'test_kafka_json.reference') with open(fpath) as reference: @@ -88,7 +105,11 @@ def test_kafka_json(started_cluster): ENGINE = Kafka('kafka1:9092', 'json', 'json', 'JSONEachRow', '\\n'); ''') + + # Don't insert malformed messages since old settings syntax + # doesn't support skipping of broken messages. kafka_check_json_numbers(instance) + instance.query('DROP TABLE test.kafka') @@ -102,12 +123,45 @@ def test_kafka_json_settings(started_cluster): kafka_topic_list = 'json', kafka_group_name = 'json', kafka_format = 'JSONEachRow', - kafka_row_delimiter = '\\n'; + kafka_row_delimiter = '\\n', + kafka_skip_broken_messages = 1; ''') - kafka_check_json_numbers(instance) + + kafka_check_json_numbers(instance, True) + instance.query('DROP TABLE test.kafka') +def test_kafka_json_materialized_view(started_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.kafka; + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS + kafka_broker_list = 'kafka1:9092', + kafka_topic_list = 'json', + kafka_group_name = 'json', + kafka_format = 'JSONEachRow', + kafka_row_delimiter = '\\n', + kafka_skip_broken_messages = 2; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.kafka; + ''') + + kafka_check_json_numbers(instance, True, 'test.view') + + instance.query(''' + DROP TABLE test.kafka; + DROP TABLE test.view; + DROP TABLE test.consumer; + ''') + + if __name__ == '__main__': cluster.start() raw_input("Cluster created, press any key to destroy...") From 862d419f31ad9dc9c17d6e50864c77abd6ba72eb Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Wed, 23 Jan 2019 14:00:43 +0300 Subject: [PATCH 119/586] Fix losing the next message after a broken one. --- dbms/src/IO/DelimitedReadBuffer.h | 52 +++++++++++++++++++ dbms/src/IO/ReadBuffer.h | 1 + .../Storages/Kafka/KafkaBlockInputStream.cpp | 5 +- .../Storages/Kafka/KafkaBlockInputStream.h | 3 +- .../Kafka/ReadBufferFromKafkaConsumer.cpp | 34 +++--------- .../Kafka/ReadBufferFromKafkaConsumer.h | 24 ++++----- 6 files changed, 75 insertions(+), 44 deletions(-) create mode 100644 dbms/src/IO/DelimitedReadBuffer.h diff --git a/dbms/src/IO/DelimitedReadBuffer.h b/dbms/src/IO/DelimitedReadBuffer.h new file mode 100644 index 00000000000..0ad77f0d0ed --- /dev/null +++ b/dbms/src/IO/DelimitedReadBuffer.h @@ -0,0 +1,52 @@ +#pragma once + +#include +#include + +namespace DB +{ +/// Consistently reads from one sub-buffer in a circle, and delimits its output with a character. +/// Owns sub-buffer. +class DelimitedReadBuffer : public ReadBuffer +{ +public: + DelimitedReadBuffer(ReadBuffer * buffer_, char delimiter_) : ReadBuffer(nullptr, 0), buffer(buffer_), delimiter(delimiter_) + { + // TODO: check that `buffer_` is not nullptr. + } + + template + BufferType * subBufferAs() + { + return typeid_cast(buffer.get()); + } + +protected: + // XXX: don't know how to guarantee that the next call to this method is done after we read all previous data. + bool nextImpl() override + { + if (put_delimiter) + { + BufferBase::set(&delimiter, 1, 0); + put_delimiter = false; + } + else + { + if (!buffer->next()) + return false; + + BufferBase::set(buffer->position(), buffer->available(), 0); + put_delimiter = true; + } + + return true; + } + +private: + std::unique_ptr buffer; // FIXME: should be `const`, but `ReadBuffer` doesn't allow + char delimiter; // FIXME: should be `const`, but `ReadBuffer` doesn't allow + + bool put_delimiter = false; +}; + +} // namespace DB diff --git a/dbms/src/IO/ReadBuffer.h b/dbms/src/IO/ReadBuffer.h index c3cbae8fcb1..94f1d2d28e5 100644 --- a/dbms/src/IO/ReadBuffer.h +++ b/dbms/src/IO/ReadBuffer.h @@ -41,6 +41,7 @@ public: */ ReadBuffer(Position ptr, size_t size, size_t offset) : BufferBase(ptr, size, offset) {} + // FIXME: behavior differs greately from `BufferBase::set()` and it's very confusing. void set(Position ptr, size_t size) { BufferBase::set(ptr, size, 0); working_buffer.resize(0); } /** read next data and fill a buffer with it; set position to the beginning; diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp index f4f5288a216..440ee6f13d4 100644 --- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp +++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp @@ -76,7 +76,7 @@ void KafkaBlockInputStream::readPrefixImpl() if (consumer == nullptr) throw Exception("Failed to claim consumer: ", ErrorCodes::TIMEOUT_EXCEEDED); - read_buf = std::make_unique(consumer, storage.log, storage.row_delimiter); + read_buf = std::make_unique(new ReadBufferFromKafkaConsumer(consumer, storage.log), storage.row_delimiter); reader = FormatFactory::instance().getInput(storage.format_name, *read_buf, storage.getSampleBlock(), context, max_block_size); } @@ -90,8 +90,7 @@ void KafkaBlockInputStream::readSuffixImpl() if (hasClaimed()) { reader->readSuffix(); - // Store offsets read in this stream - read_buf->commit(); + read_buf->subBufferAs()->commit(); // Store offsets read in this stream } // Mark as successfully finished diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.h b/dbms/src/Storages/Kafka/KafkaBlockInputStream.h index 30c7bb9ea2c..03690d37f34 100644 --- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.h +++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -26,7 +27,7 @@ private: Context context; size_t max_block_size; Block sample_block; - std::unique_ptr read_buf; + std::unique_ptr read_buf; BlockInputStreamPtr reader; bool finalized = false; diff --git a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp index b417996e2ba..c4baff72a85 100644 --- a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp +++ b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp @@ -2,7 +2,6 @@ namespace DB { - namespace { /// How long to wait for a single message (applies to each individual message) @@ -11,17 +10,8 @@ namespace bool ReadBufferFromKafkaConsumer::nextImpl() { - if (current_pending) - { - // XXX: very fishy place with const casting. - BufferBase::set( - reinterpret_cast(const_cast(current.get_payload().get_data())), current.get_payload().get_size(), 0); - current_pending = false; - return true; - } - - // Process next buffered message - auto message = consumer->poll(std::chrono::milliseconds(READ_POLL_MS)); + // FIXME: we can speed up feed if we do poll in advance + message = consumer->poll(std::chrono::milliseconds(READ_POLL_MS)); if (!message) return false; @@ -33,29 +23,17 @@ bool ReadBufferFromKafkaConsumer::nextImpl() } else if (auto err = message.get_error()) { + // TODO: should throw exception LOG_ERROR(log, "Consumer error: " << err); return false; } ++read_messages; - // Now we've received a new message. Check if we need to produce a delimiter - if (row_delimiter != '\0' && current) - { - BufferBase::set(&row_delimiter, 1, 0); - current = std::move(message); - current_pending = true; - return true; - } - - // Consume message and mark the topic/partition offset - // The offsets will be committed in the readSuffix() method after the block is completed - // If an exception is thrown before that would occur, the client will rejoin without committing offsets - current = std::move(message); - // XXX: very fishy place with const casting. - BufferBase::set( - reinterpret_cast(const_cast(current.get_payload().get_data())), current.get_payload().get_size(), 0); + auto new_position = reinterpret_cast(const_cast(message.get_payload().get_data())); + BufferBase::set(new_position, message.get_payload().get_size(), 0); + return true; } diff --git a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h index ed1a734ebb5..a00b2a0cc3d 100644 --- a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h +++ b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h @@ -12,31 +12,31 @@ using ConsumerPtr = std::shared_ptr; class ReadBufferFromKafkaConsumer : public ReadBuffer { public: - ReadBufferFromKafkaConsumer(ConsumerPtr consumer_, Poco::Logger * log_, char row_delimiter_) - : ReadBuffer(nullptr, 0), consumer(consumer_), log(log_), row_delimiter(row_delimiter_) + ReadBufferFromKafkaConsumer(ConsumerPtr consumer_, Poco::Logger * log_) + : ReadBuffer(nullptr, 0), consumer(consumer_), log(log_) { - if (row_delimiter != '\0') - LOG_TRACE(log, "Row delimiter is: " << row_delimiter); } /// Commit messages read with this consumer - void commit() + auto commit() { - LOG_TRACE(log, "Committing " << read_messages << " messages"); - if (read_messages == 0) - return; + if (read_messages) + { + LOG_TRACE(log, "Committing " << read_messages << " messages"); + consumer->async_commit(); + } - consumer->async_commit(); + auto result = read_messages; read_messages = 0; + + return result; } private: ConsumerPtr consumer; - cppkafka::Message current; - bool current_pending = false; /// We've fetched "current" message and need to process it on the next iteration. + cppkafka::Message message; Poco::Logger * log; size_t read_messages = 0; - char row_delimiter; bool nextImpl() override; }; From c50583126eb79ae24ef9490a50f1981211635f1f Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Wed, 23 Jan 2019 14:05:19 +0300 Subject: [PATCH 120/586] Fix tests --- dbms/tests/integration/test_storage_kafka/test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dbms/tests/integration/test_storage_kafka/test.py b/dbms/tests/integration/test_storage_kafka/test.py index 4505b9745ed..b7e4062bbe4 100644 --- a/dbms/tests/integration/test_storage_kafka/test.py +++ b/dbms/tests/integration/test_storage_kafka/test.py @@ -88,10 +88,11 @@ def kafka_check_json_numbers(instance, insert_malformed=False, table='test.kafka kafka_produce(instance.cluster.kafka_docker_id, 'json', messages) # XXX: since the broken message breaks the `select` reading - # we'll try to select a limited number of times. + # we'll try to select 3 times - to be sure. + result = '' for i in range(3): - instance.query('SELECT * FROM {};'.format(table)) time.sleep(1) + result += instance.query('SELECT * FROM {};'.format(table)) fpath = p.join(p.dirname(__file__), 'test_kafka_json.reference') with open(fpath) as reference: From 20b94a635cbf2d0c68a8bbe954e7f131ad442ac1 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Wed, 23 Jan 2019 19:12:41 +0300 Subject: [PATCH 121/586] Select the materialized view only once --- dbms/tests/integration/test_storage_kafka/test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/tests/integration/test_storage_kafka/test.py b/dbms/tests/integration/test_storage_kafka/test.py index b7e4062bbe4..de601756200 100644 --- a/dbms/tests/integration/test_storage_kafka/test.py +++ b/dbms/tests/integration/test_storage_kafka/test.py @@ -60,7 +60,7 @@ def kafka_produce(kafka_id, topic, messages): p.stdin.close() -def kafka_check_json_numbers(instance, insert_malformed=False, table='test.kafka'): +def kafka_check_json_numbers(instance, insert_malformed=False, table='test.kafka', select_count=3): retries = 0 while True: if kafka_is_available(instance.cluster.kafka_docker_id): @@ -88,9 +88,9 @@ def kafka_check_json_numbers(instance, insert_malformed=False, table='test.kafka kafka_produce(instance.cluster.kafka_docker_id, 'json', messages) # XXX: since the broken message breaks the `select` reading - # we'll try to select 3 times - to be sure. + # we'll try to select a limited number of times. result = '' - for i in range(3): + for i in range(select_count): time.sleep(1) result += instance.query('SELECT * FROM {};'.format(table)) @@ -154,7 +154,7 @@ def test_kafka_json_materialized_view(started_cluster): SELECT * FROM test.kafka; ''') - kafka_check_json_numbers(instance, True, 'test.view') + kafka_check_json_numbers(instance, True, 'test.view', 1) instance.query(''' DROP TABLE test.kafka; From 9636bdcd67b4a618aab1070f03a60a84b1d6284a Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Thu, 24 Jan 2019 15:44:58 +0300 Subject: [PATCH 122/586] Fix build --- dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp | 2 +- dbms/src/Storages/Kafka/KafkaBlockInputStream.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp index 440ee6f13d4..2896dfc998f 100644 --- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp +++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp @@ -19,7 +19,7 @@ KafkaBlockInputStream::KafkaBlockInputStream( // We don't use ratio since the number of Kafka messages may vary from stream to stream. // Thus, ratio is meaningless. - context.setSetting("input_format_allow_errors_ratio", 1.); + context.setSetting("input_format_allow_errors_ratio", 0.); context.setSetting("input_format_allow_errors_num", storage.skip_broken); if (schema.size() > 0) diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.h b/dbms/src/Storages/Kafka/KafkaBlockInputStream.h index 03690d37f34..6c38620e9c5 100644 --- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.h +++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include #include @@ -9,7 +9,7 @@ namespace DB { class StorageKafka; -class KafkaBlockInputStream : public IProfilingBlockInputStream +class KafkaBlockInputStream : public IBlockInputStream { public: KafkaBlockInputStream(StorageKafka & storage_, const Context & context_, const String & schema, size_t max_block_size_); From 5f1007a8df32890a1ded0c40dd787757d5c91fbb Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 24 Jan 2019 17:56:04 +0300 Subject: [PATCH 123/586] Refactor Join. --- dbms/src/Common/ColumnsHashing.h | 86 ++++++++++++---- dbms/src/Common/ColumnsHashingImpl.h | 12 +-- dbms/src/Interpreters/Join.cpp | 146 ++++++++++++++++----------- dbms/src/Interpreters/Join.h | 1 + 4 files changed, 158 insertions(+), 87 deletions(-) diff --git a/dbms/src/Common/ColumnsHashing.h b/dbms/src/Common/ColumnsHashing.h index 61c3d71cbbc..c84bf95ce53 100644 --- a/dbms/src/Common/ColumnsHashing.h +++ b/dbms/src/Common/ColumnsHashing.h @@ -36,10 +36,10 @@ using HashMethodContextPtr = std::shared_ptr; /// For the case where there is one numeric key. -template /// UInt8/16/32/64 for any type with corresponding bit width. -struct HashMethodOneNumber : public columns_hashing_impl::HashMethodBase +template /// UInt8/16/32/64 for any type with corresponding bit width. +struct HashMethodOneNumber : public columns_hashing_impl::HashMethodBase { - using Base = columns_hashing_impl::HashMethodBase; + using Base = columns_hashing_impl::HashMethodBase; const char * vec; /// If the keys of a fixed length then key_sizes contains their lengths, empty otherwise. @@ -90,10 +90,10 @@ protected: /// For the case where there is one string key. -template -struct HashMethodString : public columns_hashing_impl::HashMethodBase +template +struct HashMethodString : public columns_hashing_impl::HashMethodBase { - using Base = columns_hashing_impl::HashMethodBase; + using Base = columns_hashing_impl::HashMethodBase; const IColumn::Offset * offsets; const UInt8 * chars; @@ -107,7 +107,7 @@ struct HashMethodString : public columns_hashing_impl::HashMethodBase ALWAYS_INLINE typename Base::EmplaceResult emplaceKey(Data & data, size_t row, Arena & pool) @@ -150,10 +150,10 @@ protected: /// For the case where there is one fixed-length string key. -template -struct HashMethodFixedString : public columns_hashing_impl::HashMethodBase +template +struct HashMethodFixedString : public columns_hashing_impl::HashMethodBase { - using Base = columns_hashing_impl::HashMethodBase; + using Base = columns_hashing_impl::HashMethodBase; size_t n; const ColumnFixedString::Chars * chars; @@ -396,10 +396,13 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod if (is_nullable && row == 0) { visit_cache[row] = VisitValue::Found; + bool has_null_key = data.hasNullKeyData(); + data.hasNullKeyData() = true; + if constexpr (has_mapped) - return EmplaceResult(data.getNullKeyData(), mapped_cache[0], !data.hasNullKeyData()); + return EmplaceResult(data.getNullKeyData(), mapped_cache[0], !has_null_key); else - return EmplaceResult(!data.hasNullKeyData()); + return EmplaceResult(!has_null_key); } if (visit_cache[row] == VisitValue::Found) @@ -446,7 +449,7 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod if (is_nullable && row == 0) { if constexpr (has_mapped) - return FindResult(data.hasNullKeyData() ? data.getNullKeyData() : Mapped(), data.hasNullKeyData()); + return FindResult(data.hasNullKeyData() ? &data.getNullKeyData() : nullptr, data.hasNullKeyData()); else return FindResult(data.hasNullKeyData()); } @@ -454,7 +457,7 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod if (visit_cache[row] != VisitValue::Empty) { if constexpr (has_mapped) - return FindResult(mapped_cache[row], visit_cache[row] == VisitValue::Found); + return FindResult(&mapped_cache[row], visit_cache[row] == VisitValue::Found); else return FindResult(visit_cache[row] == VisitValue::Found); } @@ -477,7 +480,7 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod } if constexpr (has_mapped) - return FindResult(mapped_cache[row], found); + return FindResult(&mapped_cache[row], found); else return FindResult(found); } @@ -507,10 +510,10 @@ template <> struct LowCardinalityKeys {}; /// For the case where all keys are of fixed length, and they fit in N (for example, 128) bits. -template +template struct HashMethodKeysFixed : private columns_hashing_impl::BaseStateKeysFixed - , public columns_hashing_impl::HashMethodBase + , public columns_hashing_impl::HashMethodBase { static constexpr bool has_nullable_keys = has_nullable_keys_; static constexpr bool has_low_cardinality = has_low_cardinality_; @@ -520,7 +523,7 @@ struct HashMethodKeysFixed size_t keys_size; using Base = columns_hashing_impl::BaseStateKeysFixed; - using BaseHashed = columns_hashing_impl::HashMethodBase; + using BaseHashed = columns_hashing_impl::HashMethodBase; HashMethodKeysFixed(const ColumnRawPtrs & key_columns, const Sizes & key_sizes, const HashMethodContextPtr &) : key_sizes(std::move(key_sizes)), keys_size(key_columns.size()) @@ -590,10 +593,10 @@ struct HashMethodKeysFixed * That is, for example, for strings, it contains first the serialized length of the string, and then the bytes. * Therefore, when aggregating by several strings, there is no ambiguity. */ -template -struct HashMethodSerialized : public columns_hashing_impl::HashMethodBase +template +struct HashMethodSerialized : public columns_hashing_impl::HashMethodBase { - using Base = columns_hashing_impl::HashMethodBase; + using Base = columns_hashing_impl::HashMethodBase; ColumnRawPtrs key_columns; size_t keys_size; @@ -641,5 +644,46 @@ protected: } }; +/// For the case where there is one string key. +template +struct HashMethodHashed : public columns_hashing_impl::HashMethodBase +{ + using Base = columns_hashing_impl::HashMethodBase; + using Key = UInt128; + + ColumnRawPtrs key_columns; + + HashMethodHashed(ColumnRawPtrs key_columns, const Sizes &, const HashMethodContextPtr &) + : key_columns(std::move(key_columns)) {} + + static HashMethodContextPtr createContext(const HashMethodContext::Settings &) { return nullptr; } + + UInt128 getKey(size_t row) const { return hash128(row, key_columns.size(), key_columns); } + + template + ALWAYS_INLINE typename Base::EmplaceResult emplaceKey(Data & data, size_t row, Arena & /*pool*/) + { + typename Data::iterator it; + return Base::emplaceKeyImpl(getKey(row), data, it); + } + + template + ALWAYS_INLINE typename Base::FindResult findKey(Data & data, size_t row, Arena & /*pool*/) + { + return Base::findKeyImpl(getKey(row), data); + } + + template + ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & /*pool*/) + { + return data.hash(getKey(row)); + } + + static StringRef getValueRef(const Value & value) + { + return StringRef(reinterpret_cast(&value.first), sizeof(value.first)); + } +}; + } } diff --git a/dbms/src/Common/ColumnsHashingImpl.h b/dbms/src/Common/ColumnsHashingImpl.h index 565940b3338..542971d4cca 100644 --- a/dbms/src/Common/ColumnsHashingImpl.h +++ b/dbms/src/Common/ColumnsHashingImpl.h @@ -44,7 +44,7 @@ public: : value(value), cached_value(cached_value), inserted(inserted) {} bool isInserted() const { return inserted; } - const auto & getMapped() const { return value; } + auto & getMapped() const { return value; } void setMapped(const Mapped & mapped) { value = cached_value = mapped; } }; @@ -61,13 +61,13 @@ public: template class FindResultImpl { - Mapped value; + Mapped * value; bool found; public: - FindResultImpl(Mapped value, bool found) : value(value), found(found) {} + FindResultImpl(Mapped * value, bool found) : value(value), found(found) {} bool isFound() const { return found; } - const Mapped & getMapped() const { return value; } + Mapped & getMapped() const { return *value; } }; template <> @@ -142,7 +142,7 @@ protected: if (cache.check(key)) { if constexpr (has_mapped) - return FindResult(cache.found ? cache.value.second : Mapped(), cache.found); + return FindResult(&cache.value.second, cache.found); else return FindResult(cache.found); } @@ -168,7 +168,7 @@ protected: } if constexpr (has_mapped) - return FindResult(found ? it->second : Mapped(), found); + return FindResult(found ? &it->second : nullptr, found); else return FindResult(found); } diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 8783d16c3c1..62f5ea2be1d 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -150,18 +150,54 @@ static size_t getTotalByteCountImpl(const Maps & maps, Join::Type type) } -template -struct KeyGetterForType; +template +struct KeyGetterForTypeImpl; -template <> struct KeyGetterForType { using Type = JoinKeyGetterOneNumber; }; -template <> struct KeyGetterForType { using Type = JoinKeyGetterOneNumber; }; -template <> struct KeyGetterForType { using Type = JoinKeyGetterOneNumber; }; -template <> struct KeyGetterForType { using Type = JoinKeyGetterOneNumber; }; -template <> struct KeyGetterForType { using Type = JoinKeyGetterString; }; -template <> struct KeyGetterForType { using Type = JoinKeyGetterFixedString; }; -template <> struct KeyGetterForType { using Type = JoinKeyGetterFixed; }; -template <> struct KeyGetterForType { using Type = JoinKeyGetterFixed; }; -template <> struct KeyGetterForType { using Type = JoinKeyGetterHashed; }; +template struct KeyGetterForTypeImpl +{ + using Type = ColumnsHashing::HashMethodOneNumber; +}; +template struct KeyGetterForTypeImpl +{ + using Type = ColumnsHashing::HashMethodOneNumber; +}; +template struct KeyGetterForTypeImpl +{ + using Type = ColumnsHashing::HashMethodOneNumber; +}; +template struct KeyGetterForTypeImpl +{ + using Type = ColumnsHashing::HashMethodOneNumber; +}; +template struct KeyGetterForTypeImpl +{ + using Type = ColumnsHashing::HashMethodString; +}; +template struct KeyGetterForTypeImpl +{ + using Type = ColumnsHashing::HashMethodFixedString; +}; +template struct KeyGetterForTypeImpl +{ + using Type = ColumnsHashing::HashMethodKeysFixed; +}; +template struct KeyGetterForTypeImpl +{ + using Type = ColumnsHashing::HashMethodKeysFixed; +}; +template struct KeyGetterForTypeImpl +{ + using Type = ColumnsHashing::HashMethodHashed; +}; + +template +struct KeyGetterForType +{ + using Value = typename Data::value_type; + using Mapped_t = typename Data::mapped_type; + using Mapped = std::conditional_t, const Mapped_t, Mapped_t>; + using Type = typename KeyGetterForTypeImpl::Type; +}; /// Do I need to use the hash table maps_*_full, in which we remember whether the row was joined. @@ -309,40 +345,30 @@ namespace template struct Inserter { - static void insert(Map & map, const typename Map::key_type & key, Block * stored_block, size_t i, Arena & pool); + static void insert(Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool); }; template struct Inserter { - static void insert(Map & map, const typename Map::key_type & key, Block * stored_block, size_t i, Arena & pool) + static void insert(Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool) { - typename Map::iterator it; - bool inserted; - map.emplace(key, it, inserted); + auto emplace_result = key_getter.emplaceKey(map, i, pool); - if (inserted) - { - KeyGetter::onNewKey(it->first, pool); - new (&it->second) typename Map::mapped_type(stored_block, i); - } + if (emplace_result.isInserted()) + new (&emplace_result.getMapped()) typename Map::mapped_type(stored_block, i); } }; template struct Inserter { - static void insert(Map & map, const typename Map::key_type & key, Block * stored_block, size_t i, Arena & pool) + static void insert(Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool) { - typename Map::iterator it; - bool inserted; - map.emplace(key, it, inserted); + auto emplace_result = key_getter.emplaceKey(map, i, pool); - if (inserted) - { - KeyGetter::onNewKey(it->first, pool); - new (&it->second) typename Map::mapped_type(stored_block, i); - } + if (emplace_result.isInserted()) + new (&emplace_result.getMapped()) typename Map::mapped_type(stored_block, i); else { /** The first element of the list is stored in the value of the hash table, the rest in the pool. @@ -350,9 +376,10 @@ namespace * That is, the former second element, if it was, will be the third, and so on. */ auto elem = pool.alloc(); + auto & mapped = emplace_result.getMapped(); - elem->next = it->second.next; - it->second.next = elem; + elem->next = mapped.next; + mapped.next = elem; elem->block = stored_block; elem->row_num = i; } @@ -363,17 +390,16 @@ namespace template void NO_INLINE insertFromBlockImplTypeCase( Map & map, size_t rows, const ColumnRawPtrs & key_columns, - size_t keys_size, const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool) + const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool) { - KeyGetter key_getter(key_columns); + KeyGetter key_getter(key_columns, key_sizes, nullptr); for (size_t i = 0; i < rows; ++i) { if (has_null_map && (*null_map)[i]) continue; - auto key = key_getter.getKey(key_columns, keys_size, i, key_sizes); - Inserter::insert(map, key, stored_block, i, pool); + Inserter::insert(map, key_getter, stored_block, i, pool); } } @@ -381,19 +407,19 @@ namespace template void insertFromBlockImplType( Map & map, size_t rows, const ColumnRawPtrs & key_columns, - size_t keys_size, const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool) + const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool) { if (null_map) - insertFromBlockImplTypeCase(map, rows, key_columns, keys_size, key_sizes, stored_block, null_map, pool); + insertFromBlockImplTypeCase(map, rows, key_columns, key_sizes, stored_block, null_map, pool); else - insertFromBlockImplTypeCase(map, rows, key_columns, keys_size, key_sizes, stored_block, null_map, pool); + insertFromBlockImplTypeCase(map, rows, key_columns, key_sizes, stored_block, null_map, pool); } template void insertFromBlockImpl( Join::Type type, Maps & maps, size_t rows, const ColumnRawPtrs & key_columns, - size_t keys_size, const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool) + const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool) { switch (type) { @@ -402,8 +428,8 @@ namespace #define M(TYPE) \ case Join::Type::TYPE: \ - insertFromBlockImplType::Type>(\ - *maps.TYPE, rows, key_columns, keys_size, key_sizes, stored_block, null_map, pool); \ + insertFromBlockImplType>::Type>(\ + *maps.TYPE, rows, key_columns, key_sizes, stored_block, null_map, pool); \ break; APPLY_FOR_JOIN_VARIANTS(M) #undef M @@ -486,16 +512,16 @@ bool Join::insertFromBlock(const Block & block) if (!getFullness(kind)) { if (strictness == ASTTableJoin::Strictness::Any) - insertFromBlockImpl(type, maps_any, rows, key_columns, keys_size, key_sizes, stored_block, null_map, pool); + insertFromBlockImpl(type, maps_any, rows, key_columns, key_sizes, stored_block, null_map, pool); else - insertFromBlockImpl(type, maps_all, rows, key_columns, keys_size, key_sizes, stored_block, null_map, pool); + insertFromBlockImpl(type, maps_all, rows, key_columns, key_sizes, stored_block, null_map, pool); } else { if (strictness == ASTTableJoin::Strictness::Any) - insertFromBlockImpl(type, maps_any_full, rows, key_columns, keys_size, key_sizes, stored_block, null_map, pool); + insertFromBlockImpl(type, maps_any_full, rows, key_columns, key_sizes, stored_block, null_map, pool); else - insertFromBlockImpl(type, maps_all_full, rows, key_columns, keys_size, key_sizes, stored_block, null_map, pool); + insertFromBlockImpl(type, maps_all_full, rows, key_columns, key_sizes, stored_block, null_map, pool); } } @@ -511,14 +537,14 @@ namespace template struct Adder { - static void addFound(const typename Map::const_iterator & it, size_t num_columns_to_add, MutableColumns & added_columns, + static void addFound(const typename Map::mapped_type & mapped, size_t num_columns_to_add, MutableColumns & added_columns, size_t i, IColumn::Filter * filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/, const std::vector & right_indexes) { (*filter)[i] = 1; for (size_t j = 0; j < num_columns_to_add; ++j) - added_columns[j]->insertFrom(*it->second.block->getByPosition(right_indexes[j]).column.get(), it->second.row_num); + added_columns[j]->insertFrom(*mapped.block->getByPosition(right_indexes[j]).column, mapped.row_num); } static void addNotFound(size_t num_columns_to_add, MutableColumns & added_columns, @@ -534,14 +560,14 @@ namespace template struct Adder { - static void addFound(const typename Map::const_iterator & it, size_t num_columns_to_add, MutableColumns & added_columns, + static void addFound(const typename Map::mapped_type & mapped, size_t num_columns_to_add, MutableColumns & added_columns, size_t i, IColumn::Filter * filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/, const std::vector & right_indexes) { (*filter)[i] = 1; for (size_t j = 0; j < num_columns_to_add; ++j) - added_columns[j]->insertFrom(*it->second.block->getByPosition(right_indexes[j]).column.get(), it->second.row_num); + added_columns[j]->insertFrom(*mapped.block->getByPosition(right_indexes[j]).column, mapped.row_num); } static void addNotFound(size_t /*num_columns_to_add*/, MutableColumns & /*added_columns*/, @@ -554,14 +580,14 @@ namespace template struct Adder { - static void addFound(const typename Map::const_iterator & it, size_t num_columns_to_add, MutableColumns & added_columns, + static void addFound(const typename Map::mapped_type & mapped, size_t num_columns_to_add, MutableColumns & added_columns, size_t i, IColumn::Filter * filter, IColumn::Offset & current_offset, IColumn::Offsets * offsets, const std::vector & right_indexes) { (*filter)[i] = 1; size_t rows_joined = 0; - for (auto current = &static_cast(it->second); current != nullptr; current = current->next) + for (auto current = &static_cast(mapped); current != nullptr; current = current->next) { for (size_t j = 0; j < num_columns_to_add; ++j) added_columns[j]->insertFrom(*current->block->getByPosition(right_indexes[j]).column.get(), current->row_num); @@ -600,10 +626,10 @@ namespace IColumn::Offset & current_offset, std::unique_ptr & offsets_to_replicate, const std::vector & right_indexes) { - size_t keys_size = key_columns.size(); size_t num_columns_to_add = right_indexes.size(); - KeyGetter key_getter(key_columns); + Arena pool; + KeyGetter key_getter(key_columns, key_sizes, nullptr); for (size_t i = 0; i < rows; ++i) { @@ -614,14 +640,14 @@ namespace } else { - auto key = key_getter.getKey(key_columns, keys_size, i, key_sizes); - typename Map::const_iterator it = map.find(key); + auto find_result = key_getter.findKey(map, i, pool); - if (it != map.end()) + if (find_result.isFound()) { - it->second.setUsed(); + auto & mapped = find_result.getMapped(); + mapped.setUsed(); Adder::addFound( - it, num_columns_to_add, added_columns, i, filter.get(), current_offset, offsets_to_replicate.get(), right_indexes); + mapped, num_columns_to_add, added_columns, i, filter.get(), current_offset, offsets_to_replicate.get(), right_indexes); } else Adder::addNotFound( @@ -748,7 +774,7 @@ void Join::joinBlockImpl( { #define M(TYPE) \ case Join::Type::TYPE: \ - joinBlockImplType::Type>(\ + joinBlockImplType>::Type>(\ *maps.TYPE, rows, key_columns, key_sizes, added_columns, null_map, \ filter, current_offset, offsets_to_replicate, right_indexes); \ break; diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index 65371239851..3b3a6dc3a22 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -8,6 +8,7 @@ #include #include +#include #include #include From 276dc00ac1a38b4a3190f81ff8a84ee06b94321e Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 24 Jan 2019 18:14:59 +0300 Subject: [PATCH 124/586] fix --- docs/en/operations/table_engines/mergetree.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/en/operations/table_engines/mergetree.md b/docs/en/operations/table_engines/mergetree.md index bb57c76a930..52d34ea059b 100644 --- a/docs/en/operations/table_engines/mergetree.md +++ b/docs/en/operations/table_engines/mergetree.md @@ -230,15 +230,15 @@ The key for partitioning by month allows reading only those data blocks which co ### Data Skipping Indices -Index declaration in the columns section of create query. +Index declaration in the columns section of the `CREATE` query. ```sql INDEX index_name expr TYPE type(...) GRANULARITY granularity_value ``` For tables from the `*MergeTree` family data skipping indices can be specified. -These indices aggregate some information about specified expression on blocks, which consist of `granularity_value` granules, -then these aggregates is used in `SELECT` queries for reducing amount of data to read from disk by skipping big blocks of data where `where` query can not be satisfied. +These indices aggregate some information about the specified expression on blocks, which consist of `granularity_value` granules, +then these aggregates are used in `SELECT` queries for reducing the amount of data to read from the disk by skipping big blocks of data where `where` query cannot be satisfied. Example @@ -255,7 +255,7 @@ CREATE TABLE table_name ... ``` -Indices from the example can be used by ClickHouse to reduce amount of data to read from disk in following queries. +Indices from the example can be used by ClickHouse to reduce the amount of data to read from disk in following queries. ```sql SELECT count() FROM table WHERE s < 'z' SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 @@ -263,7 +263,7 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 #### Available Types of Indices -* `minmax` Stores extremes of specified expression (if expression is `tuple`, then it stores extremes for each element of `tuple`), uses stored info for skipping blocks of data like primary key. +* `minmax` Stores extremes of specified expression (if the expression is `tuple`, then it stores extremes for each element of `tuple`), uses stored info for skipping blocks of data like primary key. ```sql INDEX sample_index (u64 * length(s)) TYPE minmax GRANULARITY 4 From 84c7307ab37056c3ec8441eff22a0bdee68ad664 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 24 Jan 2019 18:16:19 +0300 Subject: [PATCH 125/586] fix --- docs/en/query_language/alter.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/query_language/alter.md b/docs/en/query_language/alter.md index b454fa06969..0cd5573e17c 100644 --- a/docs/en/query_language/alter.md +++ b/docs/en/query_language/alter.md @@ -93,8 +93,8 @@ are available: * `ALTER DROP INDEX name` - Removes index description from tables metadata and deletes index files from disk. -These commands are lightweight in sense that they only change metadata or remove files. -Also they are replicated (syncing indices metadata through ZooKeeper). +These commands are lightweight in a sense that they only change metadata or remove files. +Also, they are replicated (syncing indices metadata through ZooKeeper). ### Manipulations With Partitions and Parts From fbba93de7bc7b1c40179659f68241c7a2d6937f5 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Thu, 24 Jan 2019 20:24:41 +0300 Subject: [PATCH 126/586] Use our fork of cppkafka With some fixes --- .gitmodules | 2 +- contrib/cppkafka | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index 24211b6707e..3ae0e15c620 100644 --- a/.gitmodules +++ b/.gitmodules @@ -63,4 +63,4 @@ url = https://github.com/ClickHouse-Extras/libgsasl.git [submodule "contrib/cppkafka"] path = contrib/cppkafka - url = https://github.com/mfontanini/cppkafka.git + url = https://github.com/ClickHouse-Extras/cppkafka.git diff --git a/contrib/cppkafka b/contrib/cppkafka index efa4e95a184..860c90e92ee 160000 --- a/contrib/cppkafka +++ b/contrib/cppkafka @@ -1 +1 @@ -Subproject commit efa4e95a18458fac333f45a13c4ed55d6d3e3a00 +Subproject commit 860c90e92eee6690aa74a2ca7b7c5c6930dffecd From 4e413f4c2d693c657fe40907bded0bcf7e3c74ca Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 25 Jan 2019 14:03:02 +0300 Subject: [PATCH 127/586] Move classes to separate files --- dbms/programs/performance-test/CMakeLists.txt | 8 +- dbms/programs/performance-test/JSONString.cpp | 63 +++ dbms/programs/performance-test/JSONString.h | 39 ++ .../performance-test/PerformanceTest.cpp | 452 +----------------- .../performance-test/StopConditionsSet.cpp | 63 +++ .../performance-test/StopConditionsSet.h | 40 ++ dbms/programs/performance-test/TestStats.cpp | 175 +++++++ dbms/programs/performance-test/TestStats.h | 83 ++++ .../performance-test/TestStopConditions.cpp | 26 + .../performance-test/TestStopConditions.h | 53 ++ 10 files changed, 562 insertions(+), 440 deletions(-) create mode 100644 dbms/programs/performance-test/JSONString.cpp create mode 100644 dbms/programs/performance-test/JSONString.h create mode 100644 dbms/programs/performance-test/StopConditionsSet.cpp create mode 100644 dbms/programs/performance-test/StopConditionsSet.h create mode 100644 dbms/programs/performance-test/TestStats.cpp create mode 100644 dbms/programs/performance-test/TestStats.h create mode 100644 dbms/programs/performance-test/TestStopConditions.cpp create mode 100644 dbms/programs/performance-test/TestStopConditions.h diff --git a/dbms/programs/performance-test/CMakeLists.txt b/dbms/programs/performance-test/CMakeLists.txt index f1a08172009..591a7180691 100644 --- a/dbms/programs/performance-test/CMakeLists.txt +++ b/dbms/programs/performance-test/CMakeLists.txt @@ -1,4 +1,10 @@ -add_library (clickhouse-performance-test-lib ${LINK_MODE} PerformanceTest.cpp) +add_library (clickhouse-performance-test-lib ${LINK_MODE} + JSONString.cpp + StopConditionsSet.cpp + TestStopConditions.cpp + TestStats.cpp + PerformanceTest.cpp +) target_link_libraries (clickhouse-performance-test-lib PRIVATE dbms clickhouse_common_io clickhouse_common_config ${Boost_PROGRAM_OPTIONS_LIBRARY}) target_include_directories (clickhouse-performance-test-lib SYSTEM PRIVATE ${PCG_RANDOM_INCLUDE_DIR}) diff --git a/dbms/programs/performance-test/JSONString.cpp b/dbms/programs/performance-test/JSONString.cpp new file mode 100644 index 00000000000..abea80caf66 --- /dev/null +++ b/dbms/programs/performance-test/JSONString.cpp @@ -0,0 +1,63 @@ +#include "JSONString.h" + +#include +namespace DB +{ + +namespace +{ +String pad(size_t padding) +{ + return String(padding * 4, ' '); +} + +const std::regex NEW_LINE{"\n"}; +} + +void JSONString::set(const String key, String value, bool wrap) +{ + if (value.empty()) + value = "null"; + + bool reserved = (value[0] == '[' || value[0] == '{' || value == "null"); + if (!reserved && wrap) + value = '"' + std::regex_replace(value, NEW_LINE, "\\n") + '"'; + + content[key] = value; +} + +void JSONString::set(const String key, const std::vector & run_infos) +{ + String value = "[\n"; + + for (size_t i = 0; i < run_infos.size(); ++i) + { + value += pad(padding + 1) + run_infos[i].asString(padding + 2); + if (i != run_infos.size() - 1) + value += ','; + + value += "\n"; + } + + value += pad(padding) + ']'; + content[key] = value; +} + +String JSONString::asString(size_t cur_padding) const +{ + String repr = "{"; + + for (auto it = content.begin(); it != content.end(); ++it) + { + if (it != content.begin()) + repr += ','; + /// construct "key": "value" string with padding + repr += "\n" + pad(cur_padding) + '"' + it->first + '"' + ": " + it->second; + } + + repr += "\n" + pad(cur_padding - 1) + '}'; + return repr; +} + + +} diff --git a/dbms/programs/performance-test/JSONString.h b/dbms/programs/performance-test/JSONString.h new file mode 100644 index 00000000000..ee83be5e9a6 --- /dev/null +++ b/dbms/programs/performance-test/JSONString.h @@ -0,0 +1,39 @@ +#pragma once +#include + +#include +#include +#include +#include + +namespace DB +{ + +/// NOTE The code is totally wrong. +class JSONString +{ +private: + std::map content; + size_t padding; + +public: + explicit JSONString(size_t padding_ = 1) : padding(padding_) {} + + void set(const String key, String value, bool wrap = true); + + template + std::enable_if_t> set(const String key, T value) + { + set(key, std::to_string(value), /*wrap= */ false); + } + + void set(const String key, const std::vector & run_infos); + + String asString() const + { + return asString(padding); + } + + String asString(size_t cur_padding) const; +}; +} diff --git a/dbms/programs/performance-test/PerformanceTest.cpp b/dbms/programs/performance-test/PerformanceTest.cpp index e91365aeade..d5bfcc85c60 100644 --- a/dbms/programs/performance-test/PerformanceTest.cpp +++ b/dbms/programs/performance-test/PerformanceTest.cpp @@ -7,6 +7,7 @@ #include #include #include + #include #include #include @@ -34,6 +35,11 @@ #include #include +#include "JSONString.h" +#include "StopConditionsSet.h" +#include "TestStopConditions.h" +#include "TestStats.h" + #ifndef __clang__ #pragma GCC optimize("-fno-var-tracking-assignments") #endif @@ -45,9 +51,7 @@ */ namespace fs = boost::filesystem; using String = std::string; -const String FOUR_SPACES = " "; const std::regex QUOTE_REGEX{"\""}; -const std::regex NEW_LINE{"\n"}; namespace DB { @@ -59,439 +63,9 @@ namespace ErrorCodes extern const int FILE_DOESNT_EXIST; } -static String pad(size_t padding) -{ - return String(padding * 4, ' '); -} - - -/// NOTE The code is totally wrong. -class JSONString -{ -private: - std::map content; - size_t padding; - -public: - explicit JSONString(size_t padding_ = 1) : padding(padding_) {} - - void set(const String key, String value, bool wrap = true) - { - if (value.empty()) - value = "null"; - - bool reserved = (value[0] == '[' || value[0] == '{' || value == "null"); - if (!reserved && wrap) - value = '"' + std::regex_replace(value, NEW_LINE, "\\n") + '"'; - - content[key] = value; - } - - template - std::enable_if_t> set(const String key, T value) - { - set(key, std::to_string(value), /*wrap= */ false); - } - - void set(const String key, const std::vector & run_infos) - { - String value = "[\n"; - - for (size_t i = 0; i < run_infos.size(); ++i) - { - value += pad(padding + 1) + run_infos[i].asString(padding + 2); - if (i != run_infos.size() - 1) - value += ','; - - value += "\n"; - } - - value += pad(padding) + ']'; - content[key] = value; - } - - String asString() const - { - return asString(padding); - } - - String asString(size_t cur_padding) const - { - String repr = "{"; - - for (auto it = content.begin(); it != content.end(); ++it) - { - if (it != content.begin()) - repr += ','; - /// construct "key": "value" string with padding - repr += "\n" + pad(cur_padding) + '"' + it->first + '"' + ": " + it->second; - } - - repr += "\n" + pad(cur_padding - 1) + '}'; - return repr; - } -}; - using ConfigurationPtr = Poco::AutoPtr; -/// A set of supported stop conditions. -struct StopConditionsSet -{ - void loadFromConfig(const ConfigurationPtr & stop_conditions_view) - { - using Keys = std::vector; - Keys keys; - stop_conditions_view->keys(keys); - - for (const String & key : keys) - { - if (key == "total_time_ms") - total_time_ms.value = stop_conditions_view->getUInt64(key); - else if (key == "rows_read") - rows_read.value = stop_conditions_view->getUInt64(key); - else if (key == "bytes_read_uncompressed") - bytes_read_uncompressed.value = stop_conditions_view->getUInt64(key); - else if (key == "iterations") - iterations.value = stop_conditions_view->getUInt64(key); - else if (key == "min_time_not_changing_for_ms") - min_time_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); - else if (key == "max_speed_not_changing_for_ms") - max_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); - else if (key == "average_speed_not_changing_for_ms") - average_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); - else - throw DB::Exception("Met unkown stop condition: " + key, DB::ErrorCodes::LOGICAL_ERROR); - - ++initialized_count; - } - } - - void reset() - { - total_time_ms.fulfilled = false; - rows_read.fulfilled = false; - bytes_read_uncompressed.fulfilled = false; - iterations.fulfilled = false; - min_time_not_changing_for_ms.fulfilled = false; - max_speed_not_changing_for_ms.fulfilled = false; - average_speed_not_changing_for_ms.fulfilled = false; - - fulfilled_count = 0; - } - - /// Note: only conditions with UInt64 minimal thresholds are supported. - /// I.e. condition is fulfilled when value is exceeded. - struct StopCondition - { - UInt64 value = 0; - bool fulfilled = false; - }; - - void report(UInt64 value, StopCondition & condition) - { - if (condition.value && !condition.fulfilled && value >= condition.value) - { - condition.fulfilled = true; - ++fulfilled_count; - } - } - - StopCondition total_time_ms; - StopCondition rows_read; - StopCondition bytes_read_uncompressed; - StopCondition iterations; - StopCondition min_time_not_changing_for_ms; - StopCondition max_speed_not_changing_for_ms; - StopCondition average_speed_not_changing_for_ms; - - size_t initialized_count = 0; - size_t fulfilled_count = 0; -}; - -/// Stop conditions for a test run. The running test will be terminated in either of two conditions: -/// 1. All conditions marked 'all_of' are fulfilled -/// or -/// 2. Any condition marked 'any_of' is fulfilled -class TestStopConditions -{ -public: - void loadFromConfig(ConfigurationPtr & stop_conditions_config) - { - if (stop_conditions_config->has("all_of")) - { - ConfigurationPtr config_all_of(stop_conditions_config->createView("all_of")); - conditions_all_of.loadFromConfig(config_all_of); - } - if (stop_conditions_config->has("any_of")) - { - ConfigurationPtr config_any_of(stop_conditions_config->createView("any_of")); - conditions_any_of.loadFromConfig(config_any_of); - } - } - - bool empty() const - { - return !conditions_all_of.initialized_count && !conditions_any_of.initialized_count; - } - -#define DEFINE_REPORT_FUNC(FUNC_NAME, CONDITION) \ - void FUNC_NAME(UInt64 value) \ - { \ - conditions_all_of.report(value, conditions_all_of.CONDITION); \ - conditions_any_of.report(value, conditions_any_of.CONDITION); \ - } - - DEFINE_REPORT_FUNC(reportTotalTime, total_time_ms) - DEFINE_REPORT_FUNC(reportRowsRead, rows_read) - DEFINE_REPORT_FUNC(reportBytesReadUncompressed, bytes_read_uncompressed) - DEFINE_REPORT_FUNC(reportIterations, iterations) - DEFINE_REPORT_FUNC(reportMinTimeNotChangingFor, min_time_not_changing_for_ms) - DEFINE_REPORT_FUNC(reportMaxSpeedNotChangingFor, max_speed_not_changing_for_ms) - DEFINE_REPORT_FUNC(reportAverageSpeedNotChangingFor, average_speed_not_changing_for_ms) - -#undef REPORT - - bool areFulfilled() const - { - return (conditions_all_of.initialized_count && conditions_all_of.fulfilled_count >= conditions_all_of.initialized_count) - || (conditions_any_of.initialized_count && conditions_any_of.fulfilled_count); - } - - void reset() - { - conditions_all_of.reset(); - conditions_any_of.reset(); - } - -private: - StopConditionsSet conditions_all_of; - StopConditionsSet conditions_any_of; -}; - -struct Stats -{ - Stopwatch watch; - Stopwatch watch_per_query; - Stopwatch min_time_watch; - Stopwatch max_rows_speed_watch; - Stopwatch max_bytes_speed_watch; - Stopwatch avg_rows_speed_watch; - Stopwatch avg_bytes_speed_watch; - - bool last_query_was_cancelled = false; - - size_t queries = 0; - - size_t total_rows_read = 0; - size_t total_bytes_read = 0; - - size_t last_query_rows_read = 0; - size_t last_query_bytes_read = 0; - - using Sampler = ReservoirSampler; - Sampler sampler{1 << 16}; - - /// min_time in ms - UInt64 min_time = std::numeric_limits::max(); - double total_time = 0; - - double max_rows_speed = 0; - double max_bytes_speed = 0; - - double avg_rows_speed_value = 0; - double avg_rows_speed_first = 0; - static double avg_rows_speed_precision; - - double avg_bytes_speed_value = 0; - double avg_bytes_speed_first = 0; - static double avg_bytes_speed_precision; - - size_t number_of_rows_speed_info_batches = 0; - size_t number_of_bytes_speed_info_batches = 0; - - bool ready = false; // check if a query wasn't interrupted by SIGINT - String exception; - - String getStatisticByName(const String & statistic_name) - { - if (statistic_name == "min_time") - { - return std::to_string(min_time) + "ms"; - } - if (statistic_name == "quantiles") - { - String result = "\n"; - - for (double percent = 10; percent <= 90; percent += 10) - { - result += FOUR_SPACES + std::to_string((percent / 100)); - result += ": " + std::to_string(sampler.quantileInterpolated(percent / 100.0)); - result += "\n"; - } - result += FOUR_SPACES + "0.95: " + std::to_string(sampler.quantileInterpolated(95 / 100.0)) + "\n"; - result += FOUR_SPACES + "0.99: " + std::to_string(sampler.quantileInterpolated(99 / 100.0)) + "\n"; - result += FOUR_SPACES + "0.999: " + std::to_string(sampler.quantileInterpolated(99.9 / 100.)) + "\n"; - result += FOUR_SPACES + "0.9999: " + std::to_string(sampler.quantileInterpolated(99.99 / 100.)); - - return result; - } - if (statistic_name == "total_time") - { - return std::to_string(total_time) + "s"; - } - if (statistic_name == "queries_per_second") - { - return std::to_string(queries / total_time); - } - if (statistic_name == "rows_per_second") - { - return std::to_string(total_rows_read / total_time); - } - if (statistic_name == "bytes_per_second") - { - return std::to_string(total_bytes_read / total_time); - } - - if (statistic_name == "max_rows_per_second") - { - return std::to_string(max_rows_speed); - } - if (statistic_name == "max_bytes_per_second") - { - return std::to_string(max_bytes_speed); - } - if (statistic_name == "avg_rows_per_second") - { - return std::to_string(avg_rows_speed_value); - } - if (statistic_name == "avg_bytes_per_second") - { - return std::to_string(avg_bytes_speed_value); - } - - return ""; - } - - void update_min_time(const UInt64 min_time_candidate) - { - if (min_time_candidate < min_time) - { - min_time = min_time_candidate; - min_time_watch.restart(); - } - } - - void update_average_speed(const double new_speed_info, - Stopwatch & avg_speed_watch, - size_t & number_of_info_batches, - double precision, - double & avg_speed_first, - double & avg_speed_value) - { - avg_speed_value = ((avg_speed_value * number_of_info_batches) + new_speed_info); - ++number_of_info_batches; - avg_speed_value /= number_of_info_batches; - - if (avg_speed_first == 0) - { - avg_speed_first = avg_speed_value; - } - - if (std::abs(avg_speed_value - avg_speed_first) >= precision) - { - avg_speed_first = avg_speed_value; - avg_speed_watch.restart(); - } - } - - void update_max_speed(const size_t max_speed_candidate, Stopwatch & max_speed_watch, double & max_speed) - { - if (max_speed_candidate > max_speed) - { - max_speed = max_speed_candidate; - max_speed_watch.restart(); - } - } - - void add(size_t rows_read_inc, size_t bytes_read_inc) - { - total_rows_read += rows_read_inc; - total_bytes_read += bytes_read_inc; - last_query_rows_read += rows_read_inc; - last_query_bytes_read += bytes_read_inc; - - double new_rows_speed = last_query_rows_read / watch_per_query.elapsedSeconds(); - double new_bytes_speed = last_query_bytes_read / watch_per_query.elapsedSeconds(); - - /// Update rows speed - update_max_speed(new_rows_speed, max_rows_speed_watch, max_rows_speed); - update_average_speed(new_rows_speed, - avg_rows_speed_watch, - number_of_rows_speed_info_batches, - avg_rows_speed_precision, - avg_rows_speed_first, - avg_rows_speed_value); - /// Update bytes speed - update_max_speed(new_bytes_speed, max_bytes_speed_watch, max_bytes_speed); - update_average_speed(new_bytes_speed, - avg_bytes_speed_watch, - number_of_bytes_speed_info_batches, - avg_bytes_speed_precision, - avg_bytes_speed_first, - avg_bytes_speed_value); - } - - void updateQueryInfo() - { - ++queries; - sampler.insert(watch_per_query.elapsedSeconds()); - update_min_time(watch_per_query.elapsed() / (1000 * 1000)); /// ns to ms - } - - void setTotalTime() - { - total_time = watch.elapsedSeconds(); - } - - void clear() - { - watch.restart(); - watch_per_query.restart(); - min_time_watch.restart(); - max_rows_speed_watch.restart(); - max_bytes_speed_watch.restart(); - avg_rows_speed_watch.restart(); - avg_bytes_speed_watch.restart(); - - last_query_was_cancelled = false; - - sampler.clear(); - - queries = 0; - total_rows_read = 0; - total_bytes_read = 0; - last_query_rows_read = 0; - last_query_bytes_read = 0; - - min_time = std::numeric_limits::max(); - total_time = 0; - max_rows_speed = 0; - max_bytes_speed = 0; - avg_rows_speed_value = 0; - avg_bytes_speed_value = 0; - avg_rows_speed_first = 0; - avg_bytes_speed_first = 0; - avg_rows_speed_precision = 0.001; - avg_bytes_speed_precision = 0.001; - number_of_rows_speed_info_batches = 0; - number_of_bytes_speed_info_batches = 0; - } -}; - -double Stats::avg_rows_speed_precision = 0.001; -double Stats::avg_bytes_speed_precision = 0.001; - class PerformanceTest : public Poco::Util::Application { public: @@ -618,7 +192,7 @@ private: }; size_t times_to_run = 1; - std::vector statistics_by_run; + std::vector statistics_by_run; /// Removes configurations that has a given value. If leave is true, the logic is reversed. void removeConfigurationsIf( @@ -876,12 +450,12 @@ private: if (std::find(config_settings.begin(), config_settings.end(), "average_rows_speed_precision") != config_settings.end()) { - Stats::avg_rows_speed_precision = test_config->getDouble("settings.average_rows_speed_precision"); + TestStats::avg_rows_speed_precision = test_config->getDouble("settings.average_rows_speed_precision"); } if (std::find(config_settings.begin(), config_settings.end(), "average_bytes_speed_precision") != config_settings.end()) { - Stats::avg_bytes_speed_precision = test_config->getDouble("settings.average_bytes_speed_precision"); + TestStats::avg_bytes_speed_precision = test_config->getDouble("settings.average_bytes_speed_precision"); } } @@ -1062,7 +636,7 @@ private: for (const auto & [query, run_index] : queries_with_indexes) { TestStopConditions & stop_conditions = stop_conditions_by_run[run_index]; - Stats & statistics = statistics_by_run[run_index]; + TestStats & statistics = statistics_by_run[run_index]; statistics.clear(); try @@ -1093,7 +667,7 @@ private: } } - void execute(const Query & query, Stats & statistics, TestStopConditions & stop_conditions) + void execute(const Query & query, TestStats & statistics, TestStopConditions & stop_conditions) { statistics.watch_per_query.restart(); statistics.last_query_was_cancelled = false; @@ -1117,7 +691,7 @@ private: } void checkFulfilledConditionsAndUpdate( - const Progress & progress, RemoteBlockInputStream & stream, Stats & statistics, TestStopConditions & stop_conditions) + const Progress & progress, RemoteBlockInputStream & stream, TestStats & statistics, TestStopConditions & stop_conditions) { statistics.add(progress.rows, progress.bytes); @@ -1256,7 +830,7 @@ public: { for (size_t number_of_launch = 0; number_of_launch < times_to_run; ++number_of_launch) { - Stats & statistics = statistics_by_run[number_of_launch * queries.size() + query_index]; + TestStats & statistics = statistics_by_run[number_of_launch * queries.size() + query_index]; if (!statistics.ready) continue; diff --git a/dbms/programs/performance-test/StopConditionsSet.cpp b/dbms/programs/performance-test/StopConditionsSet.cpp new file mode 100644 index 00000000000..624c5b48a29 --- /dev/null +++ b/dbms/programs/performance-test/StopConditionsSet.cpp @@ -0,0 +1,63 @@ +#include "StopConditionsSet.h" +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +} + +void StopConditionsSet::loadFromConfig(const ConfigurationPtr & stop_conditions_view) +{ + std::vector keys; + stop_conditions_view->keys(keys); + + for (const String & key : keys) + { + if (key == "total_time_ms") + total_time_ms.value = stop_conditions_view->getUInt64(key); + else if (key == "rows_read") + rows_read.value = stop_conditions_view->getUInt64(key); + else if (key == "bytes_read_uncompressed") + bytes_read_uncompressed.value = stop_conditions_view->getUInt64(key); + else if (key == "iterations") + iterations.value = stop_conditions_view->getUInt64(key); + else if (key == "min_time_not_changing_for_ms") + min_time_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); + else if (key == "max_speed_not_changing_for_ms") + max_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); + else if (key == "average_speed_not_changing_for_ms") + average_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); + else + throw DB::Exception("Met unkown stop condition: " + key, DB::ErrorCodes::LOGICAL_ERROR); + } + ++initialized_count; +} + +void StopConditionsSet::reset() +{ + total_time_ms.fulfilled = false; + rows_read.fulfilled = false; + bytes_read_uncompressed.fulfilled = false; + iterations.fulfilled = false; + min_time_not_changing_for_ms.fulfilled = false; + max_speed_not_changing_for_ms.fulfilled = false; + average_speed_not_changing_for_ms.fulfilled = false; + + fulfilled_count = 0; +} + +void StopConditionsSet::report(UInt64 value, StopConditionsSet::StopCondition & condition) +{ + if (condition.value && !condition.fulfilled && value >= condition.value) + { + condition.fulfilled = true; + ++fulfilled_count; + } +} + + + +} diff --git a/dbms/programs/performance-test/StopConditionsSet.h b/dbms/programs/performance-test/StopConditionsSet.h new file mode 100644 index 00000000000..e83a4251bd0 --- /dev/null +++ b/dbms/programs/performance-test/StopConditionsSet.h @@ -0,0 +1,40 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +using ConfigurationPtr = Poco::AutoPtr; + +/// A set of supported stop conditions. +struct StopConditionsSet +{ + void loadFromConfig(const ConfigurationPtr & stop_conditions_view); + void reset(); + + /// Note: only conditions with UInt64 minimal thresholds are supported. + /// I.e. condition is fulfilled when value is exceeded. + struct StopCondition + { + UInt64 value = 0; + bool fulfilled = false; + }; + + void report(UInt64 value, StopCondition & condition); + + StopCondition total_time_ms; + StopCondition rows_read; + StopCondition bytes_read_uncompressed; + StopCondition iterations; + StopCondition min_time_not_changing_for_ms; + StopCondition max_speed_not_changing_for_ms; + StopCondition average_speed_not_changing_for_ms; + + size_t initialized_count = 0; + size_t fulfilled_count = 0; +}; + +} diff --git a/dbms/programs/performance-test/TestStats.cpp b/dbms/programs/performance-test/TestStats.cpp new file mode 100644 index 00000000000..163aefdc98d --- /dev/null +++ b/dbms/programs/performance-test/TestStats.cpp @@ -0,0 +1,175 @@ +#include "TestStats.h" +namespace DB +{ + +namespace +{ +const String FOUR_SPACES = " "; +} + +String TestStats::getStatisticByName(const String & statistic_name) +{ + if (statistic_name == "min_time") + return std::to_string(min_time) + "ms"; + + if (statistic_name == "quantiles") + { + String result = "\n"; + + for (double percent = 10; percent <= 90; percent += 10) + { + result += FOUR_SPACES + std::to_string((percent / 100)); + result += ": " + std::to_string(sampler.quantileInterpolated(percent / 100.0)); + result += "\n"; + } + result += FOUR_SPACES + "0.95: " + std::to_string(sampler.quantileInterpolated(95 / 100.0)) + "\n"; + result += FOUR_SPACES + "0.99: " + std::to_string(sampler.quantileInterpolated(99 / 100.0)) + "\n"; + result += FOUR_SPACES + "0.999: " + std::to_string(sampler.quantileInterpolated(99.9 / 100.)) + "\n"; + result += FOUR_SPACES + "0.9999: " + std::to_string(sampler.quantileInterpolated(99.99 / 100.)); + + return result; + } + if (statistic_name == "total_time") + return std::to_string(total_time) + "s"; + + if (statistic_name == "queries_per_second") + return std::to_string(queries / total_time); + + if (statistic_name == "rows_per_second") + return std::to_string(total_rows_read / total_time); + + if (statistic_name == "bytes_per_second") + return std::to_string(total_bytes_read / total_time); + + if (statistic_name == "max_rows_per_second") + return std::to_string(max_rows_speed); + + if (statistic_name == "max_bytes_per_second") + return std::to_string(max_bytes_speed); + + if (statistic_name == "avg_rows_per_second") + return std::to_string(avg_rows_speed_value); + + if (statistic_name == "avg_bytes_per_second") + return std::to_string(avg_bytes_speed_value); + + return ""; +} + + +void TestStats::update_min_time(UInt64 min_time_candidate) +{ + if (min_time_candidate < min_time) + { + min_time = min_time_candidate; + min_time_watch.restart(); + } +} + +void TestStats::update_max_speed( + size_t max_speed_candidate, + Stopwatch & max_speed_watch, + double & max_speed) +{ + if (max_speed_candidate > max_speed) + { + max_speed = max_speed_candidate; + max_speed_watch.restart(); + } +} + + +void TestStats::update_average_speed( + double new_speed_info, + Stopwatch & avg_speed_watch, + size_t & number_of_info_batches, + double precision, + double & avg_speed_first, + double & avg_speed_value) +{ + avg_speed_value = ((avg_speed_value * number_of_info_batches) + new_speed_info); + ++number_of_info_batches; + avg_speed_value /= number_of_info_batches; + + if (avg_speed_first == 0) + { + avg_speed_first = avg_speed_value; + } + + if (std::abs(avg_speed_value - avg_speed_first) >= precision) + { + avg_speed_first = avg_speed_value; + avg_speed_watch.restart(); + } +} + +void TestStats::add(size_t rows_read_inc, size_t bytes_read_inc) +{ + total_rows_read += rows_read_inc; + total_bytes_read += bytes_read_inc; + last_query_rows_read += rows_read_inc; + last_query_bytes_read += bytes_read_inc; + + double new_rows_speed = last_query_rows_read / watch_per_query.elapsedSeconds(); + double new_bytes_speed = last_query_bytes_read / watch_per_query.elapsedSeconds(); + + /// Update rows speed + update_max_speed(new_rows_speed, max_rows_speed_watch, max_rows_speed); + update_average_speed(new_rows_speed, + avg_rows_speed_watch, + number_of_rows_speed_info_batches, + avg_rows_speed_precision, + avg_rows_speed_first, + avg_rows_speed_value); + /// Update bytes speed + update_max_speed(new_bytes_speed, max_bytes_speed_watch, max_bytes_speed); + update_average_speed(new_bytes_speed, + avg_bytes_speed_watch, + number_of_bytes_speed_info_batches, + avg_bytes_speed_precision, + avg_bytes_speed_first, + avg_bytes_speed_value); +} + +void TestStats::updateQueryInfo() +{ + ++queries; + sampler.insert(watch_per_query.elapsedSeconds()); + update_min_time(watch_per_query.elapsed() / (1000 * 1000)); /// ns to ms +} + +void TestStats::clear() +{ + watch.restart(); + watch_per_query.restart(); + min_time_watch.restart(); + max_rows_speed_watch.restart(); + max_bytes_speed_watch.restart(); + avg_rows_speed_watch.restart(); + avg_bytes_speed_watch.restart(); + + last_query_was_cancelled = false; + + sampler.clear(); + + queries = 0; + total_rows_read = 0; + total_bytes_read = 0; + last_query_rows_read = 0; + last_query_bytes_read = 0; + + min_time = std::numeric_limits::max(); + total_time = 0; + max_rows_speed = 0; + max_bytes_speed = 0; + avg_rows_speed_value = 0; + avg_bytes_speed_value = 0; + avg_rows_speed_first = 0; + avg_bytes_speed_first = 0; + avg_rows_speed_precision = 0.001; + avg_bytes_speed_precision = 0.001; + number_of_rows_speed_info_batches = 0; + number_of_bytes_speed_info_batches = 0; +} + +} diff --git a/dbms/programs/performance-test/TestStats.h b/dbms/programs/performance-test/TestStats.h new file mode 100644 index 00000000000..41a8efc3beb --- /dev/null +++ b/dbms/programs/performance-test/TestStats.h @@ -0,0 +1,83 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ +struct TestStats +{ + Stopwatch watch; + Stopwatch watch_per_query; + Stopwatch min_time_watch; + Stopwatch max_rows_speed_watch; + Stopwatch max_bytes_speed_watch; + Stopwatch avg_rows_speed_watch; + Stopwatch avg_bytes_speed_watch; + + bool last_query_was_cancelled = false; + + size_t queries = 0; + + size_t total_rows_read = 0; + size_t total_bytes_read = 0; + + size_t last_query_rows_read = 0; + size_t last_query_bytes_read = 0; + + using Sampler = ReservoirSampler; + Sampler sampler{1 << 16}; + + /// min_time in ms + UInt64 min_time = std::numeric_limits::max(); + double total_time = 0; + + double max_rows_speed = 0; + double max_bytes_speed = 0; + + double avg_rows_speed_value = 0; + double avg_rows_speed_first = 0; + static inline double avg_rows_speed_precision = 0.001; + + double avg_bytes_speed_value = 0; + double avg_bytes_speed_first = 0; + static inline double avg_bytes_speed_precision = 0.001; + + size_t number_of_rows_speed_info_batches = 0; + size_t number_of_bytes_speed_info_batches = 0; + + bool ready = false; // check if a query wasn't interrupted by SIGINT + String exception; + + String getStatisticByName(const String & statistic_name); + + void update_min_time(UInt64 min_time_candidate); + + void update_average_speed( + double new_speed_info, + Stopwatch & avg_speed_watch, + size_t & number_of_info_batches, + double precision, + double & avg_speed_first, + double & avg_speed_value); + + void update_max_speed( + size_t max_speed_candidate, + Stopwatch & max_speed_watch, + double & max_speed); + + void add(size_t rows_read_inc, size_t bytes_read_inc); + + void updateQueryInfo(); + + void setTotalTime() + { + total_time = watch.elapsedSeconds(); + } + + void clear(); +}; + +} diff --git a/dbms/programs/performance-test/TestStopConditions.cpp b/dbms/programs/performance-test/TestStopConditions.cpp new file mode 100644 index 00000000000..bc608e4001a --- /dev/null +++ b/dbms/programs/performance-test/TestStopConditions.cpp @@ -0,0 +1,26 @@ +#include "TestStopConditions.h" + +namespace DB +{ + +void TestStopConditions::loadFromConfig(ConfigurationPtr & stop_conditions_config) +{ + if (stop_conditions_config->has("all_of")) + { + ConfigurationPtr config_all_of(stop_conditions_config->createView("all_of")); + conditions_all_of.loadFromConfig(config_all_of); + } + if (stop_conditions_config->has("any_of")) + { + ConfigurationPtr config_any_of(stop_conditions_config->createView("any_of")); + conditions_any_of.loadFromConfig(config_any_of); + } +} + +bool TestStopConditions::areFulfilled() const +{ + return (conditions_all_of.initialized_count && conditions_all_of.fulfilled_count >= conditions_all_of.initialized_count) + || (conditions_any_of.initialized_count && conditions_any_of.fulfilled_count); +} + +} diff --git a/dbms/programs/performance-test/TestStopConditions.h b/dbms/programs/performance-test/TestStopConditions.h new file mode 100644 index 00000000000..91f1baa1ced --- /dev/null +++ b/dbms/programs/performance-test/TestStopConditions.h @@ -0,0 +1,53 @@ +#pragma once +#include "StopConditionsSet.h" +#include + +namespace DB +{ +/// Stop conditions for a test run. The running test will be terminated in either of two conditions: +/// 1. All conditions marked 'all_of' are fulfilled +/// or +/// 2. Any condition marked 'any_of' is fulfilled + +using ConfigurationPtr = Poco::AutoPtr; + +class TestStopConditions +{ +public: + void loadFromConfig(ConfigurationPtr & stop_conditions_config); + inline bool empty() const + { + return !conditions_all_of.initialized_count && !conditions_any_of.initialized_count; + } + +#define DEFINE_REPORT_FUNC(FUNC_NAME, CONDITION) \ + void FUNC_NAME(UInt64 value) \ + { \ + conditions_all_of.report(value, conditions_all_of.CONDITION); \ + conditions_any_of.report(value, conditions_any_of.CONDITION); \ + } + + DEFINE_REPORT_FUNC(reportTotalTime, total_time_ms) + DEFINE_REPORT_FUNC(reportRowsRead, rows_read) + DEFINE_REPORT_FUNC(reportBytesReadUncompressed, bytes_read_uncompressed) + DEFINE_REPORT_FUNC(reportIterations, iterations) + DEFINE_REPORT_FUNC(reportMinTimeNotChangingFor, min_time_not_changing_for_ms) + DEFINE_REPORT_FUNC(reportMaxSpeedNotChangingFor, max_speed_not_changing_for_ms) + DEFINE_REPORT_FUNC(reportAverageSpeedNotChangingFor, average_speed_not_changing_for_ms) + +#undef REPORT + + bool areFulfilled() const; + + void reset() + { + conditions_all_of.reset(); + conditions_any_of.reset(); + } + +private: + StopConditionsSet conditions_all_of; + StopConditionsSet conditions_any_of; +}; + +} From 681266abd08174cfd3724f4ab9e6602679fa260f Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Fri, 25 Jan 2019 15:48:59 +0300 Subject: [PATCH 128/586] Poll messages in batches of `max_block_size` size. --- dbms/src/DataStreams/IBlockInputStream.h | 2 +- .../Storages/Kafka/KafkaBlockInputStream.cpp | 18 ++----- .../Storages/Kafka/KafkaBlockInputStream.h | 7 ++- .../Kafka/ReadBufferFromKafkaConsumer.cpp | 47 ++++++++++++------- .../Kafka/ReadBufferFromKafkaConsumer.h | 28 ++++------- dbms/src/Storages/Kafka/StorageKafka.cpp | 3 ++ 6 files changed, 52 insertions(+), 53 deletions(-) diff --git a/dbms/src/DataStreams/IBlockInputStream.h b/dbms/src/DataStreams/IBlockInputStream.h index 6d2ddbfdf2c..6c908e86e40 100644 --- a/dbms/src/DataStreams/IBlockInputStream.h +++ b/dbms/src/DataStreams/IBlockInputStream.h @@ -258,7 +258,7 @@ protected: Block extremes; - void addChild(BlockInputStreamPtr & child) + void addChild(const BlockInputStreamPtr & child) { std::unique_lock lock(children_mutex); children.push_back(child); diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp index 2896dfc998f..5668e0d167c 100644 --- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp +++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp @@ -42,7 +42,6 @@ KafkaBlockInputStream::~KafkaBlockInputStream() // Return consumer for another reader storage.pushConsumer(consumer); - consumer = nullptr; } String KafkaBlockInputStream::getName() const @@ -52,13 +51,10 @@ String KafkaBlockInputStream::getName() const Block KafkaBlockInputStream::readImpl() { - if (isCancelledOrThrowIfKilled() || !hasClaimed()) + if (!hasClaimed()) return {}; - if (!reader) - throw Exception("Logical error: reader is not initialized", ErrorCodes::LOGICAL_ERROR); - - return reader->read(); + return children.back()->read(); } Block KafkaBlockInputStream::getHeader() const @@ -76,22 +72,18 @@ void KafkaBlockInputStream::readPrefixImpl() if (consumer == nullptr) throw Exception("Failed to claim consumer: ", ErrorCodes::TIMEOUT_EXCEEDED); - read_buf = std::make_unique(new ReadBufferFromKafkaConsumer(consumer, storage.log), storage.row_delimiter); - reader = FormatFactory::instance().getInput(storage.format_name, *read_buf, storage.getSampleBlock(), context, max_block_size); + buffer = std::make_unique(new ReadBufferFromKafkaConsumer(consumer, storage.log, max_block_size), storage.row_delimiter); + addChild(FormatFactory::instance().getInput(storage.format_name, *buffer, storage.getSampleBlock(), context, max_block_size)); } // Start reading data finalized = false; - reader->readPrefix(); } void KafkaBlockInputStream::readSuffixImpl() { if (hasClaimed()) - { - reader->readSuffix(); - read_buf->subBufferAs()->commit(); // Store offsets read in this stream - } + buffer->subBufferAs()->commit(); // Mark as successfully finished finalized = true; diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.h b/dbms/src/Storages/Kafka/KafkaBlockInputStream.h index 6c38620e9c5..9414c7c5939 100644 --- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.h +++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.h @@ -23,12 +23,11 @@ public: private: StorageKafka & storage; - ConsumerPtr consumer; Context context; size_t max_block_size; - Block sample_block; - std::unique_ptr read_buf; - BlockInputStreamPtr reader; + + ConsumerPtr consumer; + std::unique_ptr buffer; bool finalized = false; // Return true if consumer has been claimed by the stream diff --git a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp index c4baff72a85..948662a9f93 100644 --- a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp +++ b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp @@ -4,35 +4,48 @@ namespace DB { namespace { - /// How long to wait for a single message (applies to each individual message) - const auto READ_POLL_MS = 500; + const auto READ_POLL_MS = 500; /// How long to wait for a batch of messages. } // namespace +void ReadBufferFromKafkaConsumer::commit() +{ + if (messages.empty() || current == messages.begin()) + return; + + auto & previous = *std::prev(current); + LOG_TRACE(log, "Committing message with offset " << previous.get_offset()); + consumer->async_commit(previous); +} + +/// Do commit messages implicitly after we processed the previous batch. bool ReadBufferFromKafkaConsumer::nextImpl() { - // FIXME: we can speed up feed if we do poll in advance - message = consumer->poll(std::chrono::milliseconds(READ_POLL_MS)); - if (!message) + if (current == messages.end()) + { + commit(); + messages = consumer->poll_batch(batch_size, std::chrono::milliseconds(READ_POLL_MS)); + current = messages.begin(); + + LOG_TRACE(log, "Polled batch of " << messages.size() << " messages"); + } + + if (messages.empty() || current == messages.end()) return false; - if (message.is_eof()) + if (auto err = current->get_error()) { - // Reached EOF while reading current batch, skip it. - LOG_TRACE(log, "EOF reached for partition " << message.get_partition() << " offset " << message.get_offset()); - return nextImpl(); - } - else if (auto err = message.get_error()) - { - // TODO: should throw exception + ++current; + + // TODO: should throw exception instead LOG_ERROR(log, "Consumer error: " << err); return false; } - ++read_messages; - // XXX: very fishy place with const casting. - auto new_position = reinterpret_cast(const_cast(message.get_payload().get_data())); - BufferBase::set(new_position, message.get_payload().get_size(), 0); + auto new_position = reinterpret_cast(const_cast(current->get_payload().get_data())); + BufferBase::set(new_position, current->get_payload().get_size(), 0); + + ++current; return true; } diff --git a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h index a00b2a0cc3d..8a3ebcbc2ef 100644 --- a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h +++ b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h @@ -12,31 +12,23 @@ using ConsumerPtr = std::shared_ptr; class ReadBufferFromKafkaConsumer : public ReadBuffer { public: - ReadBufferFromKafkaConsumer(ConsumerPtr consumer_, Poco::Logger * log_) - : ReadBuffer(nullptr, 0), consumer(consumer_), log(log_) + ReadBufferFromKafkaConsumer(ConsumerPtr consumer_, Poco::Logger * log_, size_t max_batch_size) + : ReadBuffer(nullptr, 0), consumer(consumer_), log(log_), batch_size(max_batch_size), current(messages.begin()) { } - /// Commit messages read with this consumer - auto commit() - { - if (read_messages) - { - LOG_TRACE(log, "Committing " << read_messages << " messages"); - consumer->async_commit(); - } - - auto result = read_messages; - read_messages = 0; - - return result; - } + // Commit all processed messages. + void commit(); private: + using Messages = std::vector; + ConsumerPtr consumer; - cppkafka::Message message; Poco::Logger * log; - size_t read_messages = 0; + const size_t batch_size = 1; + + Messages messages; + Messages::const_iterator current; bool nextImpl() override; }; diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp index d6b58c6720e..eb7bc04260e 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.cpp +++ b/dbms/src/Storages/Kafka/StorageKafka.cpp @@ -180,6 +180,9 @@ cppkafka::Configuration StorageKafka::createConsumerConfiguration() // We manually commit offsets after a stream successfully finished conf.set("enable.auto.commit", "false"); + // Ignore EOF messages + conf.set("enable.partition.eof", "false"); + // for debug logs inside rdkafka // conf.set("debug", "consumer,cgrp,topic,fetch"); From d9195cda99fae43857a0e5bde0958b044dbc9d02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Ercolanelli?= Date: Fri, 25 Jan 2019 14:06:21 +0100 Subject: [PATCH 129/586] implement `getWidenType()` on IDataType This is essentially to help implementing the `getReturnType` of aggregation function susceptible to provoque overflow (such as `sumMap`). --- dbms/src/Common/ErrorCodes.cpp | 1 + dbms/src/DataTypes/DataTypesDecimal.cpp | 8 ++++++++ dbms/src/DataTypes/DataTypesDecimal.h | 2 ++ dbms/src/DataTypes/DataTypesNumber.h | 7 +++++++ dbms/src/DataTypes/IDataType.cpp | 5 +++++ dbms/src/DataTypes/IDataType.h | 9 +++++++++ 6 files changed, 32 insertions(+) diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index f06a88c96da..d8749c1b1e7 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -412,6 +412,7 @@ namespace ErrorCodes extern const int NO_DATA_FOR_REQUIRED_PROTOBUF_FIELD = 435; extern const int CANNOT_CONVERT_TO_PROTOBUF_TYPE = 436; extern const int PROTOBUF_FIELD_NOT_REPEATED = 437; + extern const int DATA_TYPE_CANNOT_BE_WIDEN = 438; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/DataTypes/DataTypesDecimal.cpp b/dbms/src/DataTypes/DataTypesDecimal.cpp index 439a98928ea..f8f3084b254 100644 --- a/dbms/src/DataTypes/DataTypesDecimal.cpp +++ b/dbms/src/DataTypes/DataTypesDecimal.cpp @@ -148,6 +148,14 @@ Field DataTypeDecimal::getDefault() const } +template +DataTypePtr DataTypeDecimal::getWidenDataType() const +{ + using WidenDataType = DataTypeDecimal; + return std::make_shared(WidenDataType::maxPrecision(), scale); +} + + template MutableColumnPtr DataTypeDecimal::createColumn() const { diff --git a/dbms/src/DataTypes/DataTypesDecimal.h b/dbms/src/DataTypes/DataTypesDecimal.h index 3f93f5aaae1..125f14cbf0a 100644 --- a/dbms/src/DataTypes/DataTypesDecimal.h +++ b/dbms/src/DataTypes/DataTypesDecimal.h @@ -103,6 +103,8 @@ public: void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf) const override; Field getDefault() const override; + bool canBeWiden() const override { return true; } + DataTypePtr getWidenDataType() const override; MutableColumnPtr createColumn() const override; bool equals(const IDataType & rhs) const override; diff --git a/dbms/src/DataTypes/DataTypesNumber.h b/dbms/src/DataTypes/DataTypesNumber.h index 6ce1cbc0d24..0794af41f36 100644 --- a/dbms/src/DataTypes/DataTypesNumber.h +++ b/dbms/src/DataTypes/DataTypesNumber.h @@ -17,6 +17,13 @@ class DataTypeNumber final : public DataTypeNumberBase bool canBeUsedInBitOperations() const override { return true; } bool canBeUsedInBooleanContext() const override { return true; } bool canBeInsideNullable() const override { return true; } + + bool canBeWiden() const override { return true; } + DataTypePtr getWidenDataType() const override + { + using WidenDataType = DataTypeNumber>; + return std::make_shared(); + } }; using DataTypeUInt8 = DataTypeNumber; diff --git a/dbms/src/DataTypes/IDataType.cpp b/dbms/src/DataTypes/IDataType.cpp index e8c6a3cc63d..4ebe1eb3e48 100644 --- a/dbms/src/DataTypes/IDataType.cpp +++ b/dbms/src/DataTypes/IDataType.cpp @@ -19,6 +19,7 @@ namespace ErrorCodes { extern const int MULTIPLE_STREAMS_REQUIRED; extern const int LOGICAL_ERROR; + extern const int DATA_TYPE_CANNOT_BE_WIDEN; } @@ -51,6 +52,10 @@ ColumnPtr IDataType::createColumnConstWithDefaultValue(size_t size) const return createColumnConst(size, getDefault()); } +DataTypePtr IDataType::getWidenDataType() const +{ + throw Exception("Data type " + getName() + " can't be widen.", ErrorCodes::DATA_TYPE_CANNOT_BE_WIDEN); +} void IDataType::serializeBinaryBulk(const IColumn &, WriteBuffer &, size_t, size_t) const { diff --git a/dbms/src/DataTypes/IDataType.h b/dbms/src/DataTypes/IDataType.h index fdac4e454bc..3c20b258d09 100644 --- a/dbms/src/DataTypes/IDataType.h +++ b/dbms/src/DataTypes/IDataType.h @@ -273,6 +273,15 @@ public: */ virtual Field getDefault() const = 0; + /** The data type can be widen in order to try to avoid overflows. + * Widenable data types are typically Number or Decimal data types. + */ + virtual bool canBeWiden() const { return false; } + + /** Return the widen data type of the current data type. Throw an exception if `canBeWiden() == false`. + */ + virtual DataTypePtr getWidenDataType() const; + /** Directly insert default value into a column. Default implementation use method IColumn::insertDefault. * This should be overriden if data type default value differs from column default value (example: Enum data types). */ From 0d6094a3eae8c0268fa66064bf92088dfe2db322 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Ercolanelli?= Date: Fri, 25 Jan 2019 14:08:16 +0100 Subject: [PATCH 130/586] sumMap: return types less prone to oveflows It used to be that sumMap would return the same type as the values columns. If columns of Array(UInt8) were to be given, that would really easily cause oveflow. It now uses `getWidenDataType` (and ultimately `NearestFieldType`) in order to define the result type. --- .../src/AggregateFunctions/AggregateFunctionSumMap.h | 12 +++++++++++- .../queries/0_stateless/00502_sum_map.reference | 1 + dbms/tests/queries/0_stateless/00502_sum_map.sql | 9 +++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h index 1e5f3e38cd2..8c7c24faed5 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h @@ -72,7 +72,7 @@ public: types.emplace_back(std::make_shared(keys_type)); for (const auto & value_type : values_types) - types.emplace_back(std::make_shared(value_type)); + types.emplace_back(std::make_shared(widenDataType(value_type))); return std::make_shared(types); } @@ -260,6 +260,16 @@ public: const char * getHeaderFilePath() const override { return __FILE__; } bool keepKey(const T & key) const { return static_cast(*this).keepKey(key); } + +private: + static DataTypePtr widenDataType(const DataTypePtr & data_type) + { + if (!data_type->canBeWiden()) + throw new Exception{"Values to be summed are expected to be Numeric, Float or Decimal.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + + return data_type->getWidenDataType(); + } }; template diff --git a/dbms/tests/queries/0_stateless/00502_sum_map.reference b/dbms/tests/queries/0_stateless/00502_sum_map.reference index 7bb325be814..a8d9fe95af3 100644 --- a/dbms/tests/queries/0_stateless/00502_sum_map.reference +++ b/dbms/tests/queries/0_stateless/00502_sum_map.reference @@ -10,6 +10,7 @@ 2000-01-01 00:01:00 [4,5,6,7,8] [10,10,20,10,10] ([1],[10]) ([1,4,8],[10,20,10]) +([1],[257]) ([1],[1]) ([1],[1]) (['a'],[1]) diff --git a/dbms/tests/queries/0_stateless/00502_sum_map.sql b/dbms/tests/queries/0_stateless/00502_sum_map.sql index 9cf941dd908..24eab44d3d0 100644 --- a/dbms/tests/queries/0_stateless/00502_sum_map.sql +++ b/dbms/tests/queries/0_stateless/00502_sum_map.sql @@ -17,6 +17,15 @@ SELECT sumMapFiltered([1, 4, 8])(statusMap.status, statusMap.requests) FROM test DROP TABLE test.sum_map; +DROP TABLE IF EXISTS test.sum_map_overflow; +CREATE TABLE test.sum_map_overflow(events Array(UInt8), counts Array(UInt8)) ENGINE = Log; + +INSERT INTO test.sum_map_overflow VALUES ([1], [255]), ([1], [2]); + +SELECT sumMap(events, counts) FROM test.sum_map_overflow; + +DROP TABLE test.sum_map_overflow; + select sumMap(val, cnt) from ( SELECT [ CAST(1, 'UInt64') ] as val, [1] as cnt ); select sumMap(val, cnt) from ( SELECT [ CAST(1, 'Float64') ] as val, [1] as cnt ); select sumMap(val, cnt) from ( SELECT [ CAST('a', 'Enum16(\'a\'=1)') ] as val, [1] as cnt ); From 468f8b4cfb6ce248accefa81d41fb1c14c8f5a4a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 25 Jan 2019 17:08:01 +0300 Subject: [PATCH 131/586] Addition to prev. revision #4150 --- dbms/src/Interpreters/ThreadStatusExt.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/ThreadStatusExt.cpp b/dbms/src/Interpreters/ThreadStatusExt.cpp index 20a58de98f8..987365cb3c4 100644 --- a/dbms/src/Interpreters/ThreadStatusExt.cpp +++ b/dbms/src/Interpreters/ThreadStatusExt.cpp @@ -87,8 +87,8 @@ void ThreadStatus::attachQuery(const ThreadGroupStatusPtr & thread_group_, bool if (!global_context) global_context = thread_group->global_context; - if (!thread_group->thread_statuses.emplace(thread_number, this).second) - throw Exception("Thread " + std::to_string(thread_number) + " is attached twice", ErrorCodes::LOGICAL_ERROR); + /// NOTE: A thread may be attached multiple times if it is reused from a thread pool. + thread_group->thread_statuses.emplace(thread_number, this); } initPerformanceCounters(); From 57104f81da213e95afb4623e5d416b0f8e1f3bb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Ercolanelli?= Date: Fri, 25 Jan 2019 15:16:23 +0100 Subject: [PATCH 132/586] rename `widen` to `promote` for IDataType --- dbms/src/AggregateFunctions/AggregateFunctionSumMap.h | 8 ++++---- dbms/src/Common/ErrorCodes.cpp | 2 +- dbms/src/DataTypes/DataTypesDecimal.cpp | 6 +++--- dbms/src/DataTypes/DataTypesDecimal.h | 4 ++-- dbms/src/DataTypes/DataTypesNumber.h | 8 ++++---- dbms/src/DataTypes/IDataType.cpp | 6 +++--- dbms/src/DataTypes/IDataType.h | 10 +++++----- 7 files changed, 22 insertions(+), 22 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h index 8c7c24faed5..a1be9fa2a86 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h @@ -72,7 +72,7 @@ public: types.emplace_back(std::make_shared(keys_type)); for (const auto & value_type : values_types) - types.emplace_back(std::make_shared(widenDataType(value_type))); + types.emplace_back(std::make_shared(promoteNumericType(value_type))); return std::make_shared(types); } @@ -262,13 +262,13 @@ public: bool keepKey(const T & key) const { return static_cast(*this).keepKey(key); } private: - static DataTypePtr widenDataType(const DataTypePtr & data_type) + static DataTypePtr promoteNumericType(const DataTypePtr & data_type) { - if (!data_type->canBeWiden()) + if (!data_type->canBePromoted()) throw new Exception{"Values to be summed are expected to be Numeric, Float or Decimal.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; - return data_type->getWidenDataType(); + return data_type->promoteNumericType(); } }; diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index d8749c1b1e7..574360d6979 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -412,7 +412,7 @@ namespace ErrorCodes extern const int NO_DATA_FOR_REQUIRED_PROTOBUF_FIELD = 435; extern const int CANNOT_CONVERT_TO_PROTOBUF_TYPE = 436; extern const int PROTOBUF_FIELD_NOT_REPEATED = 437; - extern const int DATA_TYPE_CANNOT_BE_WIDEN = 438; + extern const int DATA_TYPE_CANNOT_BE_PROMOTED = 438; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/DataTypes/DataTypesDecimal.cpp b/dbms/src/DataTypes/DataTypesDecimal.cpp index f8f3084b254..e6fe1bbd7f2 100644 --- a/dbms/src/DataTypes/DataTypesDecimal.cpp +++ b/dbms/src/DataTypes/DataTypesDecimal.cpp @@ -149,10 +149,10 @@ Field DataTypeDecimal::getDefault() const template -DataTypePtr DataTypeDecimal::getWidenDataType() const +DataTypePtr DataTypeDecimal::promoteNumericType() const { - using WidenDataType = DataTypeDecimal; - return std::make_shared(WidenDataType::maxPrecision(), scale); + using PromotedType = DataTypeDecimal; + return std::make_shared(PromotedType::maxPrecision(), scale); } diff --git a/dbms/src/DataTypes/DataTypesDecimal.h b/dbms/src/DataTypes/DataTypesDecimal.h index 125f14cbf0a..c287742928a 100644 --- a/dbms/src/DataTypes/DataTypesDecimal.h +++ b/dbms/src/DataTypes/DataTypesDecimal.h @@ -103,8 +103,8 @@ public: void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf) const override; Field getDefault() const override; - bool canBeWiden() const override { return true; } - DataTypePtr getWidenDataType() const override; + bool canBePromoted() const override { return true; } + DataTypePtr promoteNumericType() const override; MutableColumnPtr createColumn() const override; bool equals(const IDataType & rhs) const override; diff --git a/dbms/src/DataTypes/DataTypesNumber.h b/dbms/src/DataTypes/DataTypesNumber.h index 0794af41f36..f54e014dfc5 100644 --- a/dbms/src/DataTypes/DataTypesNumber.h +++ b/dbms/src/DataTypes/DataTypesNumber.h @@ -18,11 +18,11 @@ class DataTypeNumber final : public DataTypeNumberBase bool canBeUsedInBooleanContext() const override { return true; } bool canBeInsideNullable() const override { return true; } - bool canBeWiden() const override { return true; } - DataTypePtr getWidenDataType() const override + bool canBePromoted() const override { return true; } + DataTypePtr promoteNumericType() const override { - using WidenDataType = DataTypeNumber>; - return std::make_shared(); + using PromotedType = DataTypeNumber>; + return std::make_shared(); } }; diff --git a/dbms/src/DataTypes/IDataType.cpp b/dbms/src/DataTypes/IDataType.cpp index 4ebe1eb3e48..a8eec08a7c8 100644 --- a/dbms/src/DataTypes/IDataType.cpp +++ b/dbms/src/DataTypes/IDataType.cpp @@ -19,7 +19,7 @@ namespace ErrorCodes { extern const int MULTIPLE_STREAMS_REQUIRED; extern const int LOGICAL_ERROR; - extern const int DATA_TYPE_CANNOT_BE_WIDEN; + extern const int DATA_TYPE_CANNOT_BE_PROMOTED; } @@ -52,9 +52,9 @@ ColumnPtr IDataType::createColumnConstWithDefaultValue(size_t size) const return createColumnConst(size, getDefault()); } -DataTypePtr IDataType::getWidenDataType() const +DataTypePtr IDataType::promoteNumericType() const { - throw Exception("Data type " + getName() + " can't be widen.", ErrorCodes::DATA_TYPE_CANNOT_BE_WIDEN); + throw Exception("Data type " + getName() + " can't be promoted.", ErrorCodes::DATA_TYPE_CANNOT_BE_PROMOTED); } void IDataType::serializeBinaryBulk(const IColumn &, WriteBuffer &, size_t, size_t) const diff --git a/dbms/src/DataTypes/IDataType.h b/dbms/src/DataTypes/IDataType.h index 3c20b258d09..1fce04639bf 100644 --- a/dbms/src/DataTypes/IDataType.h +++ b/dbms/src/DataTypes/IDataType.h @@ -273,14 +273,14 @@ public: */ virtual Field getDefault() const = 0; - /** The data type can be widen in order to try to avoid overflows. - * Widenable data types are typically Number or Decimal data types. + /** The data type can be promoted in order to try to avoid overflows. + * Data types which can be promoted are typically Number or Decimal data types. */ - virtual bool canBeWiden() const { return false; } + virtual bool canBePromoted() const { return false; } - /** Return the widen data type of the current data type. Throw an exception if `canBeWiden() == false`. + /** Return the promoted numeric data type of the current data type. Throw an exception if `canBePromoted() == false`. */ - virtual DataTypePtr getWidenDataType() const; + virtual DataTypePtr promoteNumericType() const; /** Directly insert default value into a column. Default implementation use method IColumn::insertDefault. * This should be overriden if data type default value differs from column default value (example: Enum data types). From a78282b8347704d1456f9b1e66383347def5b094 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Ercolanelli?= Date: Fri, 25 Jan 2019 15:42:44 +0100 Subject: [PATCH 133/586] IDataType.h: remove trailing whitespace --- dbms/src/DataTypes/IDataType.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/DataTypes/IDataType.h b/dbms/src/DataTypes/IDataType.h index 1fce04639bf..ae14fe1aa36 100644 --- a/dbms/src/DataTypes/IDataType.h +++ b/dbms/src/DataTypes/IDataType.h @@ -280,7 +280,7 @@ public: /** Return the promoted numeric data type of the current data type. Throw an exception if `canBePromoted() == false`. */ - virtual DataTypePtr promoteNumericType() const; + virtual DataTypePtr promoteNumericType() const; /** Directly insert default value into a column. Default implementation use method IColumn::insertDefault. * This should be overriden if data type default value differs from column default value (example: Enum data types). From 091efcda9553610818c00088b93e903ed1ed67cf Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 25 Jan 2019 18:42:24 +0300 Subject: [PATCH 134/586] add IdentifierSemantic and PredicateExpressionsOptimizer refactoring --- .../Interpreters/AddDefaultDatabaseVisitor.h | 3 +- .../Interpreters/ArrayJoinedColumnsVisitor.h | 3 +- dbms/src/Interpreters/ColumnNamesContext.cpp | 3 +- .../DatabaseAndTableWithAlias.cpp | 68 +---- .../Interpreters/DatabaseAndTableWithAlias.h | 10 +- dbms/src/Interpreters/ExternalTablesVisitor.h | 3 +- .../Interpreters/GlobalSubqueriesVisitor.h | 1 + dbms/src/Interpreters/IdentifierSemantic.cpp | 106 +++++++ dbms/src/Interpreters/IdentifierSemantic.h | 35 +++ .../InJoinSubqueriesPreprocessor.cpp | 1 + .../Interpreters/InterpreterDescribeQuery.cpp | 16 +- .../PredicateExpressionsOptimizer.cpp | 276 ++++++++++-------- .../PredicateExpressionsOptimizer.h | 48 ++- dbms/src/Interpreters/QueryNormalizer.cpp | 8 +- .../RequiredSourceColumnsVisitor.cpp | 3 +- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 35 +-- .../TranslateQualifiedNamesVisitor.cpp | 45 +-- .../TranslateQualifiedNamesVisitor.h | 2 +- dbms/src/Parsers/ASTIdentifier.cpp | 82 ++---- dbms/src/Parsers/ASTIdentifier.h | 54 ++-- .../MergeTree/MergeTreeWhereOptimizer.cpp | 3 +- dbms/src/Storages/StorageView.cpp | 1 + dbms/src/Storages/VirtualColumnUtils.cpp | 3 +- .../TableFunctions/TableFunctionRemote.cpp | 1 + .../00597_push_down_predicate.reference | 1 + .../0_stateless/00597_push_down_predicate.sql | 3 +- .../00674_join_on_syntax.reference | 4 +- .../0_stateless/00674_join_on_syntax.sql | 12 +- .../queries/0_stateless/00703_join_crash.sql | 2 +- 29 files changed, 433 insertions(+), 399 deletions(-) create mode 100644 dbms/src/Interpreters/IdentifierSemantic.cpp create mode 100644 dbms/src/Interpreters/IdentifierSemantic.h diff --git a/dbms/src/Interpreters/AddDefaultDatabaseVisitor.h b/dbms/src/Interpreters/AddDefaultDatabaseVisitor.h index 2b032ff8a88..ce542a63503 100644 --- a/dbms/src/Interpreters/AddDefaultDatabaseVisitor.h +++ b/dbms/src/Interpreters/AddDefaultDatabaseVisitor.h @@ -11,6 +11,7 @@ #include #include #include +#include namespace DB { @@ -99,7 +100,7 @@ private: /// @note It expects that only table (not column) identifiers are visited. void visit(const ASTIdentifier & identifier, ASTPtr & ast) const { - if (identifier.name_parts.empty()) + if (!identifier.compound()) ast = createTableIdentifier(database_name, identifier.name); } diff --git a/dbms/src/Interpreters/ArrayJoinedColumnsVisitor.h b/dbms/src/Interpreters/ArrayJoinedColumnsVisitor.h index 0c19ffe7387..6aed7436572 100644 --- a/dbms/src/Interpreters/ArrayJoinedColumnsVisitor.h +++ b/dbms/src/Interpreters/ArrayJoinedColumnsVisitor.h @@ -10,6 +10,7 @@ #include #include +#include #include @@ -95,7 +96,7 @@ private: NameToNameMap & array_join_alias_to_name = data.array_join_alias_to_name; NameToNameMap & array_join_result_to_source = data.array_join_result_to_source; - if (!getColumnIdentifierName(node)) + if (!IdentifierSemantic::getColumnName(node)) return; auto splitted = Nested::splitName(node.name); /// ParsedParams, Key1 diff --git a/dbms/src/Interpreters/ColumnNamesContext.cpp b/dbms/src/Interpreters/ColumnNamesContext.cpp index 246b5f5306e..3240ca7fea1 100644 --- a/dbms/src/Interpreters/ColumnNamesContext.cpp +++ b/dbms/src/Interpreters/ColumnNamesContext.cpp @@ -1,4 +1,5 @@ #include +#include #include namespace DB @@ -31,7 +32,7 @@ bool ColumnNamesContext::addColumnAliasIfAny(const IAST & ast, bool is_public) void ColumnNamesContext::addColumnIdentifier(const ASTIdentifier & node, bool is_public) { - if (!getColumnIdentifierName(node)) + if (!IdentifierSemantic::getColumnName(node)) return; required_names.insert(node.name); diff --git a/dbms/src/Interpreters/DatabaseAndTableWithAlias.cpp b/dbms/src/Interpreters/DatabaseAndTableWithAlias.cpp index efd29f962d0..c9afb5da722 100644 --- a/dbms/src/Interpreters/DatabaseAndTableWithAlias.cpp +++ b/dbms/src/Interpreters/DatabaseAndTableWithAlias.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -11,60 +12,13 @@ namespace DB { -/// Checks that ast is ASTIdentifier and remove num_qualifiers_to_strip components from left. -/// Example: 'database.table.name' -> (num_qualifiers_to_strip = 2) -> 'name'. -void stripIdentifier(const DB::ASTPtr & ast, size_t num_qualifiers_to_strip) -{ - ASTIdentifier * identifier = typeid_cast(ast.get()); - - if (!identifier) - throw DB::Exception("ASTIdentifier expected for stripIdentifier", DB::ErrorCodes::LOGICAL_ERROR); - - if (num_qualifiers_to_strip) - { - identifier->name_parts.erase(identifier->name_parts.begin(), identifier->name_parts.begin() + num_qualifiers_to_strip); - DB::String new_name; - for (const auto & part : identifier->name_parts) - { - if (!new_name.empty()) - new_name += '.'; - new_name += part; - } - identifier->name.swap(new_name); - } -} - -/// Get the number of components of identifier which are correspond to 'alias.', 'table.' or 'databas.table.' from names. -size_t getNumComponentsToStripInOrderToTranslateQualifiedName(const ASTIdentifier & identifier, - const DatabaseAndTableWithAlias & names) -{ - /// database.table.column - if (doesIdentifierBelongTo(identifier, names.database, names.table)) - return 2; - - /// table.column or alias.column. - if (doesIdentifierBelongTo(identifier, names.table) || - doesIdentifierBelongTo(identifier, names.alias)) - return 1; - - return 0; -} - - DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTIdentifier & identifier, const String & current_database) { - database = current_database; - table = identifier.name; alias = identifier.tryGetAlias(); - if (!identifier.name_parts.empty()) - { - if (identifier.name_parts.size() != 2) - throw Exception("Logical error: 2 components expected in table expression '" + identifier.name + "'", ErrorCodes::LOGICAL_ERROR); - - database = identifier.name_parts[0]; - table = identifier.name_parts[1]; - } + std::tie(database, table) = IdentifierSemantic::extractDatabaseAndTable(identifier); + if (database.empty()) + database = current_database; } DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTPtr & node, const String & current_database) @@ -108,19 +62,7 @@ String DatabaseAndTableWithAlias::getQualifiedNamePrefix() const { if (alias.empty() && table.empty()) return ""; - - return (!alias.empty() ? alias : (database + '.' + table)) + '.'; -} - -void DatabaseAndTableWithAlias::makeQualifiedName(const ASTPtr & ast) const -{ - if (auto identifier = typeid_cast(ast.get())) - { - String prefix = getQualifiedNamePrefix(); - identifier->name.insert(identifier->name.begin(), prefix.begin(), prefix.end()); - - addIdentifierQualifier(*identifier, database, table, alias); - } + return (!alias.empty() ? alias : table) + '.'; } std::vector getSelectTablesExpression(const ASTSelectQuery & select_query) diff --git a/dbms/src/Interpreters/DatabaseAndTableWithAlias.h b/dbms/src/Interpreters/DatabaseAndTableWithAlias.h index 87884e26273..bb4f7ca92ef 100644 --- a/dbms/src/Interpreters/DatabaseAndTableWithAlias.h +++ b/dbms/src/Interpreters/DatabaseAndTableWithAlias.h @@ -29,21 +29,13 @@ struct DatabaseAndTableWithAlias DatabaseAndTableWithAlias(const ASTIdentifier & identifier, const String & current_database = ""); DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database); - /// "alias." or "database.table." if alias is empty + /// "alias." or "table." if alias is empty String getQualifiedNamePrefix() const; - /// If ast is ASTIdentifier, prepend getQualifiedNamePrefix() to it's name. - void makeQualifiedName(const ASTPtr & ast) const; - /// Check if it satisfies another db_table name. @note opterion is not symmetric. bool satisfies(const DatabaseAndTableWithAlias & table, bool table_may_be_an_alias); }; -void stripIdentifier(const DB::ASTPtr & ast, size_t num_qualifiers_to_strip); - -size_t getNumComponentsToStripInOrderToTranslateQualifiedName(const ASTIdentifier & identifier, - const DatabaseAndTableWithAlias & names); - std::vector getDatabaseAndTables(const ASTSelectQuery & select_query, const String & current_database); std::optional getDatabaseAndTable(const ASTSelectQuery & select, size_t table_number); diff --git a/dbms/src/Interpreters/ExternalTablesVisitor.h b/dbms/src/Interpreters/ExternalTablesVisitor.h index d8b177b1ed3..2f3eecd2828 100644 --- a/dbms/src/Interpreters/ExternalTablesVisitor.h +++ b/dbms/src/Interpreters/ExternalTablesVisitor.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB { @@ -33,7 +34,7 @@ public: private: static std::vector visit(const ASTIdentifier & node, ASTPtr &, Data & data) { - if (auto opt_name = getTableIdentifierName(node)) + if (auto opt_name = IdentifierSemantic::getTableName(node)) if (StoragePtr external_storage = data.context.tryGetExternalTable(*opt_name)) data.external_tables[*opt_name] = external_storage; return {}; diff --git a/dbms/src/Interpreters/GlobalSubqueriesVisitor.h b/dbms/src/Interpreters/GlobalSubqueriesVisitor.h index 3e4108464f9..81e45d2abea 100644 --- a/dbms/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/dbms/src/Interpreters/GlobalSubqueriesVisitor.h @@ -16,6 +16,7 @@ #include #include #include +#include namespace DB { diff --git a/dbms/src/Interpreters/IdentifierSemantic.cpp b/dbms/src/Interpreters/IdentifierSemantic.cpp new file mode 100644 index 00000000000..e6fe2257d20 --- /dev/null +++ b/dbms/src/Interpreters/IdentifierSemantic.cpp @@ -0,0 +1,106 @@ +#include + +#include + +namespace DB +{ + +std::optional IdentifierSemantic::getColumnName(const ASTIdentifier & node) +{ + if (!node.semantic->special) + return node.name; + return {}; +} + +std::optional IdentifierSemantic::getColumnName(const ASTPtr & ast) +{ + if (ast) + if (auto id = typeid_cast(ast.get())) + if (!id->semantic->special) + return id->name; + return {}; +} + +std::optional IdentifierSemantic::getTableName(const ASTIdentifier & node) +{ + if (node.semantic->special) + return node.name; + return {}; +} + +std::optional IdentifierSemantic::getTableName(const ASTPtr & ast) +{ + if (ast) + if (auto id = typeid_cast(ast.get())) + if (id->semantic->special) + return id->name; + return {}; +} + +std::pair IdentifierSemantic::extractDatabaseAndTable(const ASTIdentifier & identifier) +{ + if (identifier.name_parts.size() > 2) + throw Exception("Logical error: more than two components in table expression", ErrorCodes::LOGICAL_ERROR); + + if (identifier.name_parts.size() == 2) + return { identifier.name_parts[0], identifier.name_parts[1] }; + return { "", identifier.name }; +} + +bool IdentifierSemantic::doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & database, const String & table) +{ + size_t num_components = identifier.name_parts.size(); + if (num_components >= 3) + return identifier.name_parts[0] == database && + identifier.name_parts[1] == table; + return false; +} + +bool IdentifierSemantic::doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & table) +{ + size_t num_components = identifier.name_parts.size(); + if (num_components >= 2) + return identifier.name_parts[0] == table; + return false; +} + +size_t IdentifierSemantic::canReferColumnToTable(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table) +{ + /// database.table.column + if (doesIdentifierBelongTo(identifier, db_and_table.database, db_and_table.table)) + return 2; + + /// table.column or alias.column. + if (doesIdentifierBelongTo(identifier, db_and_table.table) || + doesIdentifierBelongTo(identifier, db_and_table.alias)) + return 1; + + return 0; +} + +/// Checks that ast is ASTIdentifier and remove num_qualifiers_to_strip components from left. +/// Example: 'database.table.name' -> (num_qualifiers_to_strip = 2) -> 'name'. +void IdentifierSemantic::setColumnShortName(ASTIdentifier & identifier, size_t to_strip) +{ + if (!to_strip) + return; + + std::vector stripped(identifier.name_parts.begin() + to_strip, identifier.name_parts.end()); + + DB::String new_name; + for (const auto & part : stripped) + { + if (!new_name.empty()) + new_name += '.'; + new_name += part; + } + identifier.name.swap(new_name); +} + +void IdentifierSemantic::setColumnQualifiedName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table) +{ + String prefix = db_and_table.getQualifiedNamePrefix(); + identifier.name.insert(identifier.name.begin(), prefix.begin(), prefix.end()); +} + +} diff --git a/dbms/src/Interpreters/IdentifierSemantic.h b/dbms/src/Interpreters/IdentifierSemantic.h new file mode 100644 index 00000000000..895a51899fe --- /dev/null +++ b/dbms/src/Interpreters/IdentifierSemantic.h @@ -0,0 +1,35 @@ +#pragma once + +#include +#include + +namespace DB +{ + +struct IdentifierSemanticImpl +{ + bool special = false; +}; + +/// Static calss to manipulate IdentifierSemanticImpl via ASTIdentifier +struct IdentifierSemantic +{ + /// @returns name for column identifiers + static std::optional getColumnName(const ASTIdentifier & node); + static std::optional getColumnName(const ASTPtr & ast); + + /// @returns name for 'not a column' identifiers + static std::optional getTableName(const ASTIdentifier & node); + static std::optional getTableName(const ASTPtr & ast); + static std::pair extractDatabaseAndTable(const ASTIdentifier & identifier); + + static size_t canReferColumnToTable(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); + static void setColumnShortName(ASTIdentifier & identifier, size_t match); + static void setColumnQualifiedName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); + +private: + static bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & database, const String & table); + static bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & table); +}; + +} diff --git a/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp b/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp index e8e697cfe4f..e0be0d068e0 100644 --- a/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp +++ b/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include diff --git a/dbms/src/Interpreters/InterpreterDescribeQuery.cpp b/dbms/src/Interpreters/InterpreterDescribeQuery.cpp index 7c2812eb428..b80426fcdd9 100644 --- a/dbms/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/dbms/src/Interpreters/InterpreterDescribeQuery.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -83,20 +84,11 @@ BlockInputStreamPtr InterpreterDescribeQuery::executeImpl() } else { + auto identifier = typeid_cast(table_expression->database_and_table_name.get()); + String database_name; String table_name; - - auto identifier = typeid_cast(table_expression->database_and_table_name.get()); - if (identifier->name_parts.size() > 2) - throw Exception("Logical error: more than two components in table expression", ErrorCodes::LOGICAL_ERROR); - - if (identifier->name_parts.size() > 1) - { - database_name = identifier->name_parts[0]; - table_name = identifier->name_parts[1]; - } - else - table_name = identifier->name; + std::tie(database_name, table_name) = IdentifierSemantic::extractDatabaseAndTable(*identifier); table = context.getTable(database_name, table_name); } diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index 99ea0479009..cd4c33ce558 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -1,12 +1,23 @@ +#include + #include #include #include #include +#include #include -#include +#include +#include +#include +#include +#include +#include +#include #include #include #include +#include +#include #include #include #include "TranslateQualifiedNamesVisitor.h" @@ -22,6 +33,59 @@ namespace ErrorCodes static constexpr auto and_function_name = "and"; + +struct FindIdentifierBestTableData +{ + using TypeToVisit = ASTIdentifier; + + const std::vector & tables; + std::vector> identifier_table; + + FindIdentifierBestTableData(const std::vector & tables_) + : tables(tables_) + {} + + void visit(ASTIdentifier & identifier, ASTPtr &) + { + const DatabaseAndTableWithAlias * best_table = nullptr; + + if (!identifier.compound()) + { + if (!tables.empty()) + best_table = &tables[0]; + } + else + { + size_t best_match = 0; + for (const DatabaseAndTableWithAlias & table : tables) + { + if (size_t match = IdentifierSemantic::canReferColumnToTable(identifier, table)) + if (match > best_match) + { + best_match = match; + best_table = &table; + } + } + } + + identifier_table.emplace_back(&identifier, best_table); + } +}; + +using FindIdentifierBestTableMatcher = OneTypeMatcher; +using FindIdentifierBestTableVisitor = InDepthNodeVisitor; + + +static bool allowPushDown(const ASTSelectQuery * subquery) +{ + return subquery && + !subquery->final() && + !subquery->limit_by_expression_list && + !subquery->limit_length && + !subquery->with_expression_list; +} + + PredicateExpressionsOptimizer::PredicateExpressionsOptimizer( ASTSelectQuery * ast_select_, ExtractedSettings && settings_, const Context & context_) : ast_select(ast_select_), settings(settings_), context(context_) @@ -36,47 +100,50 @@ bool PredicateExpressionsOptimizer::optimize() if (!ast_select->where_expression && !ast_select->prewhere_expression) return false; - SubqueriesProjectionColumns all_subquery_projection_columns; - getAllSubqueryProjectionColumns(all_subquery_projection_columns); + SubqueriesProjectionColumns all_subquery_projection_columns = getAllSubqueryProjectionColumns(); bool is_rewrite_subqueries = false; if (!all_subquery_projection_columns.empty()) { - is_rewrite_subqueries |= optimizeImpl(ast_select->where_expression, all_subquery_projection_columns, false); - is_rewrite_subqueries |= optimizeImpl(ast_select->prewhere_expression, all_subquery_projection_columns, true); + is_rewrite_subqueries |= optimizeImpl(ast_select->where_expression, all_subquery_projection_columns, OptimizeKind::PUSH_TO_WHERE); + is_rewrite_subqueries |= optimizeImpl(ast_select->prewhere_expression, all_subquery_projection_columns, OptimizeKind::PUSH_TO_PREWHERE); } return is_rewrite_subqueries; } bool PredicateExpressionsOptimizer::optimizeImpl( - ASTPtr & outer_expression, SubqueriesProjectionColumns & subqueries_projection_columns, bool is_prewhere) + ASTPtr & outer_expression, SubqueriesProjectionColumns & subqueries_projection_columns, OptimizeKind expression_kind) { /// split predicate with `and` - PredicateExpressions outer_predicate_expressions = splitConjunctionPredicate(outer_expression); + std::vector outer_predicate_expressions = splitConjunctionPredicate(outer_expression); std::vector database_and_table_with_aliases = getDatabaseAndTables(*ast_select, context.getCurrentDatabase()); bool is_rewrite_subquery = false; - for (const auto & outer_predicate : outer_predicate_expressions) + for (auto & outer_predicate : outer_predicate_expressions) { if (isArrayJoinFunction(outer_predicate)) continue; - IdentifiersWithQualifiedNameSet outer_predicate_dependencies; - getDependenciesAndQualifiedOfExpression(outer_predicate, outer_predicate_dependencies, database_and_table_with_aliases); + auto outer_predicate_dependencies = getDependenciesAndQualifiers(outer_predicate, database_and_table_with_aliases); /// TODO: remove origin expression - for (const auto & subquery_projection_columns : subqueries_projection_columns) + for (const auto & [subquery, projection_columns] : subqueries_projection_columns) { - auto subquery = static_cast(subquery_projection_columns.first); - const ProjectionsWithAliases projection_columns = subquery_projection_columns.second; - OptimizeKind optimize_kind = OptimizeKind::NONE; - if (!cannotPushDownOuterPredicate(projection_columns, subquery, outer_predicate_dependencies, is_prewhere, optimize_kind)) + if (allowPushDown(subquery) && canPushDownOuterPredicate(projection_columns, outer_predicate_dependencies, optimize_kind)) { - ASTPtr inner_predicate; - cloneOuterPredicateForInnerPredicate(outer_predicate, projection_columns, database_and_table_with_aliases, inner_predicate); + if (optimize_kind == OptimizeKind::NONE) + optimize_kind = expression_kind; + + ASTPtr inner_predicate = outer_predicate->clone(); + cleanExpressionAlias(inner_predicate); /// clears the alias name contained in the outer predicate + + std::vector inner_predicate_dependencies = + getDependenciesAndQualifiers(inner_predicate, database_and_table_with_aliases); + + setNewAliasesForInnerPredicate(projection_columns, inner_predicate_dependencies); switch (optimize_kind) { @@ -91,9 +158,9 @@ bool PredicateExpressionsOptimizer::optimizeImpl( return is_rewrite_subquery; } -PredicateExpressions PredicateExpressionsOptimizer::splitConjunctionPredicate(ASTPtr & predicate_expression) +std::vector PredicateExpressionsOptimizer::splitConjunctionPredicate(ASTPtr & predicate_expression) { - PredicateExpressions predicate_expressions; + std::vector predicate_expressions; if (predicate_expression) { @@ -127,77 +194,79 @@ PredicateExpressions PredicateExpressionsOptimizer::splitConjunctionPredicate(AS return predicate_expressions; } -void PredicateExpressionsOptimizer::getDependenciesAndQualifiedOfExpression(const ASTPtr & expression, - IdentifiersWithQualifiedNameSet & dependencies_and_qualified, - std::vector & tables_with_aliases) +std::vector +PredicateExpressionsOptimizer::getDependenciesAndQualifiers(ASTPtr & expression, std::vector & tables) { - if (const auto identifier = typeid_cast(expression.get())) + FindIdentifierBestTableVisitor::Data find_data(tables); + FindIdentifierBestTableVisitor(find_data).visit(expression); + + std::vector dependencies; + + for (const auto & [identifier, table] : find_data.identifier_table) { String table_alias; - if (!identifier->name_parts.empty()) - { - if (!tables_with_aliases.empty()) - table_alias = tables_with_aliases[0].getQualifiedNamePrefix(); - } - else - { - size_t best_table_pos = 0; - size_t max_num_qualifiers_to_strip = 0; + if (table) + table_alias = table->getQualifiedNamePrefix(); - /// translate qualifiers for dependent columns - for (size_t table_pos = 0; table_pos < tables_with_aliases.size(); ++table_pos) - { - const auto & table = tables_with_aliases[table_pos]; - auto num_qualifiers_to_strip = getNumComponentsToStripInOrderToTranslateQualifiedName(*identifier, table); - - if (num_qualifiers_to_strip > max_num_qualifiers_to_strip) - { - max_num_qualifiers_to_strip = num_qualifiers_to_strip; - best_table_pos = table_pos; - } - } - - table_alias = tables_with_aliases[best_table_pos].getQualifiedNamePrefix(); - } - - String qualified_name = table_alias + expression->getAliasOrColumnName(); - dependencies_and_qualified.emplace_back(std::pair(identifier, qualified_name)); - } - else - { - for (const auto & child : expression->children) - getDependenciesAndQualifiedOfExpression(child, dependencies_and_qualified, tables_with_aliases); + dependencies.emplace_back(identifier, table_alias); } + + return dependencies; } -bool PredicateExpressionsOptimizer::cannotPushDownOuterPredicate( - const ProjectionsWithAliases & subquery_projection_columns, ASTSelectQuery * subquery, - IdentifiersWithQualifiedNameSet & outer_predicate_dependencies, bool & is_prewhere, OptimizeKind & optimize_kind) +static String qualifiedName(ASTIdentifier * identifier, const String & prefix) { - if (subquery->final() || subquery->limit_by_expression_list || subquery->limit_length || subquery->with_expression_list) - return true; + if (identifier->isShort()) + return prefix + identifier->getAliasOrColumnName(); + return identifier->getAliasOrColumnName(); +} - for (auto & predicate_dependency : outer_predicate_dependencies) +bool PredicateExpressionsOptimizer::canPushDownOuterPredicate( + const std::vector & projection_columns, + const std::vector & dependencies, + OptimizeKind & optimize_kind) +{ + for (const auto & [identifier, prefix] : dependencies) { bool is_found = false; + String qualified_name = qualifiedName(identifier, prefix); - for (auto projection_column : subquery_projection_columns) + for (const auto & [ast, alias] : projection_columns) { - if (projection_column.second == predicate_dependency.second) + if (alias == qualified_name) { is_found = true; - optimize_kind = isAggregateFunction(projection_column.first) ? OptimizeKind::PUSH_TO_HAVING : optimize_kind; + if (isAggregateFunction(ast)) + optimize_kind = OptimizeKind::PUSH_TO_HAVING; } } if (!is_found) - return true; + return false; } - if (optimize_kind == OptimizeKind::NONE) - optimize_kind = is_prewhere ? OptimizeKind::PUSH_TO_PREWHERE : OptimizeKind::PUSH_TO_WHERE; + return true; +} - return false; +void PredicateExpressionsOptimizer::setNewAliasesForInnerPredicate( + const std::vector & projection_columns, + const std::vector & dependencies) +{ + for (auto & [identifier, prefix] : dependencies) + { + String qualified_name = qualifiedName(identifier, prefix); + + for (auto & [ast, alias] : projection_columns) + { + if (alias == qualified_name) + { + if (!isIdentifier(ast) && ast->tryGetAlias().empty()) + ast->setAlias(ast->getColumnName()); + + identifier->resetWithAlias(ast->getAliasOrColumnName()); + } + } + } } bool PredicateExpressionsOptimizer::isArrayJoinFunction(const ASTPtr & node) @@ -215,47 +284,21 @@ bool PredicateExpressionsOptimizer::isArrayJoinFunction(const ASTPtr & node) return false; } -bool PredicateExpressionsOptimizer::isAggregateFunction(ASTPtr & node) +bool PredicateExpressionsOptimizer::isAggregateFunction(const ASTPtr & node) { - if (auto function = typeid_cast(node.get())) + if (auto function = typeid_cast(node.get())) { if (AggregateFunctionFactory::instance().isAggregateFunctionName(function->name)) return true; } - for (auto & child : node->children) + for (const auto & child : node->children) if (isAggregateFunction(child)) return true; return false; } -void PredicateExpressionsOptimizer::cloneOuterPredicateForInnerPredicate( - const ASTPtr & outer_predicate, const ProjectionsWithAliases & projection_columns, - std::vector & tables, ASTPtr & inner_predicate) -{ - inner_predicate = outer_predicate->clone(); - - /// clears the alias name contained in the outer predicate - cleanExpressionAlias(inner_predicate); - IdentifiersWithQualifiedNameSet new_expression_requires; - getDependenciesAndQualifiedOfExpression(inner_predicate, new_expression_requires, tables); - - for (auto & require : new_expression_requires) - { - for (auto projection : projection_columns) - { - if (require.second == projection.second) - { - ASTPtr & ast = projection.first; - if (!typeid_cast(ast.get()) && ast->tryGetAlias().empty()) - ast->setAlias(ast->getColumnName()); - require.first->name = ast->getAliasOrColumnName(); - } - } - } -} - bool PredicateExpressionsOptimizer::optimizeExpression(const ASTPtr & outer_expression, ASTPtr & subquery_expression, ASTSelectQuery * subquery) { ASTPtr new_subquery_expression = subquery_expression; @@ -272,33 +315,32 @@ bool PredicateExpressionsOptimizer::optimizeExpression(const ASTPtr & outer_expr return true; } -void PredicateExpressionsOptimizer::getAllSubqueryProjectionColumns(SubqueriesProjectionColumns & all_subquery_projection_columns) +PredicateExpressionsOptimizer::SubqueriesProjectionColumns PredicateExpressionsOptimizer::getAllSubqueryProjectionColumns() { - const auto tables_expression = getSelectTablesExpression(*ast_select); + SubqueriesProjectionColumns projection_columns; - for (const auto & table_expression : tables_expression) - { + for (const auto & table_expression : getSelectTablesExpression(*ast_select)) if (table_expression->subquery) - { - /// Use qualifiers to translate the columns of subqueries - DatabaseAndTableWithAlias database_and_table_with_alias(*table_expression, context.getCurrentDatabase()); - String qualified_name_prefix = database_and_table_with_alias.getQualifiedNamePrefix(); - getSubqueryProjectionColumns(all_subquery_projection_columns, qualified_name_prefix, - static_cast(table_expression->subquery.get())->children[0]); - } - } + getSubqueryProjectionColumns(table_expression->subquery, projection_columns); + + return projection_columns; } -void PredicateExpressionsOptimizer::getSubqueryProjectionColumns(SubqueriesProjectionColumns & all_subquery_projection_columns, - String & qualified_name_prefix, const ASTPtr & subquery) +void PredicateExpressionsOptimizer::getSubqueryProjectionColumns(const ASTPtr & subquery, SubqueriesProjectionColumns & projection_columns) { - ASTs select_with_union_projections; - auto select_with_union_query = static_cast(subquery.get()); + String qualified_name_prefix = subquery->tryGetAlias(); + if (!qualified_name_prefix.empty()) + qualified_name_prefix += '.'; - for (auto & select_without_union_query : select_with_union_query->list_of_selects->children) + const ASTPtr & subselect = subquery->children[0]; + + ASTs select_with_union_projections; + auto select_with_union_query = static_cast(subselect.get()); + + for (auto & select : select_with_union_query->list_of_selects->children) { - ProjectionsWithAliases subquery_projections; - auto select_projection_columns = getSelectQueryProjectionColumns(select_without_union_query); + std::vector subquery_projections; + auto select_projection_columns = getSelectQueryProjectionColumns(select); if (!select_projection_columns.empty()) { @@ -309,7 +351,7 @@ void PredicateExpressionsOptimizer::getSubqueryProjectionColumns(SubqueriesProje subquery_projections.emplace_back(std::pair(select_projection_columns[i], qualified_name_prefix + select_with_union_projections[i]->getAliasOrColumnName())); - all_subquery_projection_columns.insert(std::pair(select_without_union_query.get(), subquery_projections)); + projection_columns.insert(std::pair(static_cast(select.get()), subquery_projections)); } } } diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.h b/dbms/src/Interpreters/PredicateExpressionsOptimizer.h index 65148e0682a..93e666dde32 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.h +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.h @@ -1,26 +1,14 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include #include namespace DB { -using PredicateExpressions = std::vector; -using ProjectionWithAlias = std::pair; -using ProjectionsWithAliases = std::vector; -using SubqueriesProjectionColumns = std::map; -using IdentifierWithQualifiedName = std::pair; -using IdentifiersWithQualifiedNameSet = std::vector; - +class ASTIdentifier; +class ASTSelectQuery; +class ASTSubquery; +class Context; /** This class provides functions for Push-Down predicate expressions * @@ -35,6 +23,10 @@ using IdentifiersWithQualifiedNameSet = std::vector */ class PredicateExpressionsOptimizer { + using ProjectionWithAlias = std::pair; + using SubqueriesProjectionColumns = std::map>; + using IdentifierWithQualifier = std::pair; + /// Extracts settings, mostly to show which are used and which are not. struct ExtractedSettings { @@ -73,29 +65,29 @@ private: PUSH_TO_HAVING, }; - bool isAggregateFunction(ASTPtr & node); + bool isAggregateFunction(const ASTPtr & node); bool isArrayJoinFunction(const ASTPtr & node); - PredicateExpressions splitConjunctionPredicate(ASTPtr & predicate_expression); + std::vector splitConjunctionPredicate(ASTPtr & predicate_expression); - void getDependenciesAndQualifiedOfExpression(const ASTPtr & expression, IdentifiersWithQualifiedNameSet & dependencies_and_qualified, - std::vector & tables_with_aliases); + std::vector getDependenciesAndQualifiers(ASTPtr & expression, + std::vector & tables_with_aliases); bool optimizeExpression(const ASTPtr & outer_expression, ASTPtr & subquery_expression, ASTSelectQuery * subquery); - bool optimizeImpl(ASTPtr & outer_expression, SubqueriesProjectionColumns & subqueries_projection_columns, bool is_prewhere); + bool optimizeImpl(ASTPtr & outer_expression, SubqueriesProjectionColumns & subqueries_projection_columns, OptimizeKind optimize_kind); - bool cannotPushDownOuterPredicate(const ProjectionsWithAliases & subquery_projection_columns, ASTSelectQuery * subquery, - IdentifiersWithQualifiedNameSet & outer_predicate_dependencies, bool & is_prewhere, OptimizeKind & optimize_kind); + bool canPushDownOuterPredicate(const std::vector & subquery_projection_columns, + const std::vector & outer_predicate_dependencies, + OptimizeKind & optimize_kind); - void cloneOuterPredicateForInnerPredicate(const ASTPtr & outer_predicate, const ProjectionsWithAliases & projection_columns, - std::vector & tables, ASTPtr & inner_predicate); + void setNewAliasesForInnerPredicate(const std::vector & projection_columns, + const std::vector & inner_predicate_dependencies); - void getAllSubqueryProjectionColumns(SubqueriesProjectionColumns & all_subquery_projection_columns); + SubqueriesProjectionColumns getAllSubqueryProjectionColumns(); - void getSubqueryProjectionColumns(SubqueriesProjectionColumns & all_subquery_projection_columns, - String & qualified_name_prefix, const ASTPtr & subquery); + void getSubqueryProjectionColumns(const ASTPtr & subquery, SubqueriesProjectionColumns & all_subquery_projection_columns); ASTs getSelectQueryProjectionColumns(ASTPtr & ast); diff --git a/dbms/src/Interpreters/QueryNormalizer.cpp b/dbms/src/Interpreters/QueryNormalizer.cpp index a5151be7c64..56529ae595c 100644 --- a/dbms/src/Interpreters/QueryNormalizer.cpp +++ b/dbms/src/Interpreters/QueryNormalizer.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -107,7 +108,7 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) auto & current_asts = data.current_asts; String & current_alias = data.current_alias; - if (!getColumnIdentifierName(node)) + if (!IdentifierSemantic::getColumnName(node)) return; /// If it is an alias, but not a parent alias (for constructs like "SELECT column + 1 AS column"). @@ -124,7 +125,7 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) if (!my_alias.empty() && my_alias != alias_node->getAliasOrColumnName()) { /// Avoid infinite recursion here - auto opt_name = getColumnIdentifierName(alias_node); + auto opt_name = IdentifierSemantic::getColumnName(alias_node); bool is_cycle = opt_name && *opt_name == node.name; if (!is_cycle) @@ -273,8 +274,7 @@ void QueryNormalizer::visitChildren(const ASTPtr & node, Data & data) visit(child, data); } } - else if (!typeid_cast(node.get()) && - !typeid_cast(node.get())) + else if (!typeid_cast(node.get())) { for (auto & child : node->children) { diff --git a/dbms/src/Interpreters/RequiredSourceColumnsVisitor.cpp b/dbms/src/Interpreters/RequiredSourceColumnsVisitor.cpp index 5dc479fee5e..436e636bec8 100644 --- a/dbms/src/Interpreters/RequiredSourceColumnsVisitor.cpp +++ b/dbms/src/Interpreters/RequiredSourceColumnsVisitor.cpp @@ -46,8 +46,7 @@ bool RequiredSourceColumnsMatcher::needChildVisit(ASTPtr & node, const ASTPtr & return false; /// Processed. Do not need children. - if (typeid_cast(node.get()) || - typeid_cast(node.get()) || + if (typeid_cast(node.get()) || typeid_cast(node.get()) || typeid_cast(node.get())) return false; diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index fb6a34d37b9..cc9fe20a69b 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -477,20 +478,18 @@ void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTS std::function get_table_belonging; get_table_belonging = [&](const ASTPtr & ast) -> TableBelonging { - if (getColumnIdentifierName(ast)) + if (IdentifierSemantic::getColumnName(ast)) { auto * identifier = typeid_cast(ast.get()); - { - auto left_num_components = getNumComponentsToStripInOrderToTranslateQualifiedName(*identifier, left_source_names); - auto right_num_components = getNumComponentsToStripInOrderToTranslateQualifiedName(*identifier, right_source_names); + size_t left_match_degree = IdentifierSemantic::canReferColumnToTable(*identifier, left_source_names); + size_t right_match_degree = IdentifierSemantic::canReferColumnToTable(*identifier, right_source_names); + + if (left_match_degree > right_match_degree) + return {identifier, nullptr}; + if (left_match_degree < right_match_degree) + return {nullptr, identifier}; - /// Assume that component from definite table if num_components is greater than for the other table. - if (left_num_components > right_num_components) - return {identifier, nullptr}; - if (left_num_components < right_num_components) - return {nullptr, identifier}; - } return {}; } @@ -516,19 +515,15 @@ void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTS std::function translate_qualified_names; translate_qualified_names = [&](ASTPtr & ast, const DatabaseAndTableWithAlias & source_names, bool right_table) { - if (getColumnIdentifierName(ast)) + if (IdentifierSemantic::getColumnName(ast)) { - auto * identifier = typeid_cast(ast.get()); + auto * identifier = typeid_cast(ast.get()); - { - auto num_components = getNumComponentsToStripInOrderToTranslateQualifiedName(*identifier, source_names); - stripIdentifier(ast, num_components); + size_t match = IdentifierSemantic::canReferColumnToTable(*identifier, source_names); + IdentifierSemantic::setColumnShortName(*identifier, match); - if (right_table && source_columns.count(ast->getColumnName())) - source_names.makeQualifiedName(ast); - - } - return; + if (right_table && source_columns.count(ast->getColumnName())) + IdentifierSemantic::setColumnQualifiedName(*identifier, source_names); } for (auto & child : ast->children) diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index f884028dd1c..382c8043f2d 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -28,8 +29,7 @@ bool TranslateQualifiedNamesMatcher::needChildVisit(ASTPtr & node, const ASTPtr return false; /// Processed nodes. Do not go into children. - if (typeid_cast(node.get()) || - typeid_cast(node.get()) || + if (typeid_cast(node.get()) || typeid_cast(node.get())) return false; @@ -50,38 +50,25 @@ std::vector TranslateQualifiedNamesMatcher::visit(ASTPtr & ast, Data & return {}; } -std::vector TranslateQualifiedNamesMatcher::visit(const ASTIdentifier & identifier, ASTPtr & ast, Data & data) +std::vector TranslateQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr & ast, Data & data) { - const NameSet & source_columns = data.source_columns; - const std::vector & tables = data.tables; - - if (getColumnIdentifierName(identifier)) + if (IdentifierSemantic::getColumnName(identifier)) { - /// Select first table name with max number of qualifiers which can be stripped. - size_t max_num_qualifiers_to_strip = 0; - size_t best_table_pos = 0; + bool best_table_pos = 0; + size_t best_match = 0; + for (size_t i = 0; i < data.tables.size(); ++i) + if (size_t match = IdentifierSemantic::canReferColumnToTable(identifier, data.tables[i])) + if (match > best_match) + { + best_match = match; + best_table_pos = i; + } - for (size_t table_pos = 0; table_pos < tables.size(); ++table_pos) - { - const auto & table = tables[table_pos]; - auto num_qualifiers_to_strip = getNumComponentsToStripInOrderToTranslateQualifiedName(identifier, table); - - if (num_qualifiers_to_strip > max_num_qualifiers_to_strip) - { - max_num_qualifiers_to_strip = num_qualifiers_to_strip; - best_table_pos = table_pos; - } - } - - if (max_num_qualifiers_to_strip) - stripIdentifier(ast, max_num_qualifiers_to_strip); + IdentifierSemantic::setColumnShortName(identifier, best_match); /// In case if column from the joined table are in source columns, change it's name to qualified. - if (best_table_pos && source_columns.count(ast->getColumnName())) - { - const DatabaseAndTableWithAlias & table = tables[best_table_pos]; - table.makeQualifiedName(ast); - } + if (best_table_pos && data.source_columns.count(ast->getColumnName())) + IdentifierSemantic::setColumnQualifiedName(identifier, data.tables[best_table_pos]); } return {}; diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h index 2b996bd0370..48d41213cb8 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h @@ -30,7 +30,7 @@ public: static bool needChildVisit(ASTPtr & node, const ASTPtr & child); private: - static std::vector visit(const ASTIdentifier & node, ASTPtr & ast, Data &); + static std::vector visit(ASTIdentifier & node, ASTPtr & ast, Data &); static std::vector visit(const ASTQualifiedAsterisk & node, const ASTPtr & ast, Data &); static std::vector visit(ASTTableJoin & node, const ASTPtr & ast, Data &); static std::vector visit(ASTSelectQuery & node, const ASTPtr & ast, Data &); diff --git a/dbms/src/Parsers/ASTIdentifier.cpp b/dbms/src/Parsers/ASTIdentifier.cpp index 81e6bda9741..e5500a89bd0 100644 --- a/dbms/src/Parsers/ASTIdentifier.cpp +++ b/dbms/src/Parsers/ASTIdentifier.cpp @@ -1,12 +1,27 @@ -#include #include +#include #include #include +#include namespace DB { +std::shared_ptr ASTIdentifier::createSpecial(const String & name, std::vector && name_parts) +{ + auto ret = std::make_shared(name, std::move(name_parts)); + ret->semantic->special = true; + return ret; +} + +ASTIdentifier::ASTIdentifier(const String & name_, std::vector && name_parts_) + : name(name_) + , name_parts(name_parts_) + , semantic(std::make_shared()) +{ +} + void ASTIdentifier::formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const { auto format_element = [&](const String & elem_name) @@ -74,74 +89,11 @@ bool getIdentifierName(const ASTPtr & ast, String & name) return false; } -std::optional getColumnIdentifierName(const ASTIdentifier & node) -{ - if (!node.special) - return node.name; - return {}; -} - -std::optional getColumnIdentifierName(const ASTPtr & ast) -{ - if (ast) - if (auto id = typeid_cast(ast.get())) - if (!id->special) - return id->name; - return {}; -} - -std::optional getTableIdentifierName(const ASTIdentifier & node) -{ - if (node.special) - return node.name; - return {}; -} - -std::optional getTableIdentifierName(const ASTPtr & ast) -{ - if (ast) - if (auto id = typeid_cast(ast.get())) - if (id->special) - return id->name; - return {}; -} - void setIdentifierSpecial(ASTPtr & ast) { if (ast) if (ASTIdentifier * id = typeid_cast(ast.get())) - id->setSpecial(); -} - -void addIdentifierQualifier(ASTIdentifier & identifier, const String & database, const String & table, const String & alias) -{ - if (!alias.empty()) - { - identifier.name_parts.emplace_back(alias); - } - else - { - if (!database.empty()) - identifier.name_parts.emplace_back(database); - identifier.name_parts.emplace_back(table); - } -} - -bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & database, const String & table) -{ - size_t num_components = identifier.name_parts.size(); - if (num_components >= 3) - return identifier.name_parts[0] == database && - identifier.name_parts[1] == table; - return false; -} - -bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & table) -{ - size_t num_components = identifier.name_parts.size(); - if (num_components >= 2) - return identifier.name_parts[0] == table; - return false; + id->semantic->special = true; } } diff --git a/dbms/src/Parsers/ASTIdentifier.h b/dbms/src/Parsers/ASTIdentifier.h index 3b7550f8d5c..5c287eb9da4 100644 --- a/dbms/src/Parsers/ASTIdentifier.h +++ b/dbms/src/Parsers/ASTIdentifier.h @@ -8,6 +8,11 @@ namespace DB { +struct IdentifierSemantic; +struct IdentifierSemanticImpl; +struct DatabaseAndTableWithAlias; + + /// Identifier (column, table or alias) class ASTIdentifier : public ASTWithAlias { @@ -15,14 +20,8 @@ public: /// The composite identifier will have a concatenated name (of the form a.b.c), /// and individual components will be available inside the name_parts. String name; - std::vector name_parts; - ASTIdentifier(const String & name_, std::vector && name_parts_ = {}) - : name(name_) - , name_parts(name_parts_) - , special(false) - { - } + ASTIdentifier(const String & name_, std::vector && name_parts_ = {}); /** Get the text that identifies this element. */ String getID(char delim) const override { return "Identifier" + (delim + name); } @@ -34,6 +33,15 @@ public: set.insert(name); } + bool compound() const { return !name_parts.empty(); } + bool isShort() const { return name_parts.empty() || name == name_parts.back(); } + + void resetWithAlias(const String & new_name) + { + name = new_name; + name_parts.clear(); + } + protected: void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; void appendColumnNameImpl(WriteBuffer & ostr) const override; @@ -41,29 +49,21 @@ protected: private: using ASTWithAlias::children; /// ASTIdentifier is child free - bool special; /// TODO: it would be ptr to semantic here + std::vector name_parts; + std::shared_ptr semantic; /// pimpl - static std::shared_ptr createSpecial(const String & name, std::vector && name_parts = {}) - { - auto ret = std::make_shared(name, std::move(name_parts)); - ret->special = true; - return ret; - } + static std::shared_ptr createSpecial(const String & name, std::vector && name_parts = {}); - void setSpecial() { special = true; } - - friend void setIdentifierSpecial(ASTPtr &); - friend std::optional getColumnIdentifierName(const ASTIdentifier & node); - friend std::optional getColumnIdentifierName(const ASTPtr & ast); - friend std::optional getTableIdentifierName(const ASTIdentifier & node); - friend std::optional getTableIdentifierName(const ASTPtr & ast); + friend struct IdentifierSemantic; friend ASTPtr createTableIdentifier(const String & database_name, const String & table_name); + friend void setIdentifierSpecial(ASTPtr & ast); }; /// ASTIdentifier Helpers: hide casts and semantic. ASTPtr createTableIdentifier(const String & database_name, const String & table_name); +void setIdentifierSpecial(ASTPtr & ast); bool isIdentifier(const IAST * const ast); inline bool isIdentifier(const ASTPtr & ast) { return isIdentifier(ast.get()); } @@ -72,17 +72,5 @@ std::optional getIdentifierName(const IAST * const ast); inline std::optional getIdentifierName(const ASTPtr & ast) { return getIdentifierName(ast.get()); } bool getIdentifierName(const ASTPtr & ast, String & name); -/// @returns name for column identifiers -std::optional getColumnIdentifierName(const ASTIdentifier & node); -std::optional getColumnIdentifierName(const ASTPtr & ast); - -/// @returns name for 'not a column' identifiers -std::optional getTableIdentifierName(const ASTIdentifier & node); -std::optional getTableIdentifierName(const ASTPtr & ast); - -void setIdentifierSpecial(ASTPtr & ast); -void addIdentifierQualifier(ASTIdentifier & identifier, const String & database, const String & table, const String & alias); -bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & table_or_alias); -bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & database, const String & table); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 66a9a6f90d2..d52a855e3f3 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -380,7 +381,7 @@ bool MergeTreeWhereOptimizer::cannotBeMoved(const ASTPtr & ptr) const if ("indexHint" == function_ptr->name) return true; } - else if (auto opt_name = getColumnIdentifierName(ptr)) + else if (auto opt_name = IdentifierSemantic::getColumnName(ptr)) { /// disallow moving result of ARRAY JOIN to PREWHERE if (array_joined_names.count(*opt_name) || diff --git a/dbms/src/Storages/StorageView.cpp b/dbms/src/Storages/StorageView.cpp index bb31bd81e53..97c085d16e3 100644 --- a/dbms/src/Storages/StorageView.cpp +++ b/dbms/src/Storages/StorageView.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include diff --git a/dbms/src/Storages/VirtualColumnUtils.cpp b/dbms/src/Storages/VirtualColumnUtils.cpp index 990a587445c..b7ea5e66c37 100644 --- a/dbms/src/Storages/VirtualColumnUtils.cpp +++ b/dbms/src/Storages/VirtualColumnUtils.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -96,7 +97,7 @@ static bool isValidFunction(const ASTPtr & expression, const NameSet & columns) if (!isValidFunction(expression->children[i], columns)) return false; - if (auto opt_name = getColumnIdentifierName(expression)) + if (auto opt_name = IdentifierSemantic::getColumnName(expression)) return columns.count(*opt_name); return true; diff --git a/dbms/src/TableFunctions/TableFunctionRemote.cpp b/dbms/src/TableFunctions/TableFunctionRemote.cpp index a41f1c89c49..0d457a01be3 100644 --- a/dbms/src/TableFunctions/TableFunctionRemote.cpp +++ b/dbms/src/TableFunctions/TableFunctionRemote.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference index df5aebabc89..bc28d4efb7e 100644 --- a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference +++ b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference @@ -13,6 +13,7 @@ 3 3 2000-01-01 1 test string 1 1 3 3 +-------Force push down------- 2000-01-01 1 test string 1 1 2000-01-01 1 test string 1 1 2000-01-01 1 test string 1 1 diff --git a/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql b/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql index 2f1af7fa1fa..0180fcdeb1e 100644 --- a/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql +++ b/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql @@ -33,6 +33,7 @@ SELECT * FROM (SELECT toUInt64(b), sum(id) AS b FROM test.test) WHERE `toUInt64( SELECT date, id, name, value FROM (SELECT date, name, value, min(id) AS id FROM test.test GROUP BY date, name, value) WHERE id = 1; SELECT * FROM (SELECT toUInt64(table_alias.b) AS a, sum(id) AS b FROM test.test AS table_alias) AS outer_table_alias WHERE outer_table_alias.b = 3; +SELECT '-------Force push down-------'; SET force_primary_key = 1; -- Optimize predicate expression with asterisk @@ -65,7 +66,7 @@ SELECT * FROM (SELECT 1 AS id, toDate('2000-01-01') AS date FROM system.numbers SELECT * FROM test.test_view WHERE id = 1; SELECT * FROM test.test_view WHERE id = 2; SELECT id FROM test.test_view WHERE id = 1; -SELECT s.id FROM test.test_view AS s WHERE id = 1; +SELECT s.id FROM test.test_view AS s WHERE s.id = 1; SELECT '-------Push to having expression, need check.-------'; SELECT id FROM (SELECT min(id) AS id FROM test.test) WHERE id = 1; -- { serverError 277 } diff --git a/dbms/tests/queries/0_stateless/00674_join_on_syntax.reference b/dbms/tests/queries/0_stateless/00674_join_on_syntax.reference index ae0944580e5..10c31e56fe8 100644 --- a/dbms/tests/queries/0_stateless/00674_join_on_syntax.reference +++ b/dbms/tests/queries/0_stateless/00674_join_on_syntax.reference @@ -46,8 +46,8 @@ join on complex expression 2 3 2 3 duplicate column names -{"a1":1,"test.tab1_copy.a1":2} -{"a1":1,"test.tab1_copy.a1":2} +{"a1":1,"tab1_copy.a1":2} +{"a1":1,"tab1_copy.a1":2} {"a1":1,"copy.a1":2} {"a1":1,"copy.a1":2} {"a1":1,"copy.a1":2} diff --git a/dbms/tests/queries/0_stateless/00674_join_on_syntax.sql b/dbms/tests/queries/0_stateless/00674_join_on_syntax.sql index 06d930d8c6d..27d83e4684c 100644 --- a/dbms/tests/queries/0_stateless/00674_join_on_syntax.sql +++ b/dbms/tests/queries/0_stateless/00674_join_on_syntax.sql @@ -76,11 +76,11 @@ select a2, b2 from test.tab2 second any left join test.tab3 third on third.a3 + select a2, b2 from test.tab2 second any left join test.tab3 third on third.a3 + test.tab3.b3 = test.tab2.a2 + second.b2; select 'duplicate column names'; -select a1, tab1_copy.a1 from test.tab1 any left join test.tab1_copy on tab1.b1 + 3 = b1 + 2 FORMAT JSONEachRow; -select a1, test.tab1_copy.a1 from test.tab1 any left join test.tab1_copy on tab1.b1 + 3 = b1 + 2 FORMAT JSONEachRow; -select a1, copy.a1 from test.tab1 any left join test.tab1_copy copy on tab1.b1 + 3 = b1 + 2 FORMAT JSONEachRow; -select a1, tab1_copy.a1 from test.tab1 any left join test.tab1_copy copy on tab1.b1 + 3 = b1 + 2 FORMAT JSONEachRow; -select a1, test.tab1_copy.a1 from test.tab1 any left join test.tab1_copy copy on tab1.b1 + 3 = b1 + 2 FORMAT JSONEachRow; +select a1, tab1_copy.a1 from test.tab1 any left join test.tab1_copy on tab1.b1 + 3 = tab1_copy.b1 + 2 FORMAT JSONEachRow; +select a1, test.tab1_copy.a1 from test.tab1 any left join test.tab1_copy on tab1.b1 + 3 = tab1_copy.b1 + 2 FORMAT JSONEachRow; +select a1, copy.a1 from test.tab1 any left join test.tab1_copy copy on tab1.b1 + 3 = tab1_copy.b1 + 2 FORMAT JSONEachRow; +select a1, tab1_copy.a1 from test.tab1 any left join test.tab1_copy copy on tab1.b1 + 3 = tab1_copy.b1 + 2 FORMAT JSONEachRow; +select a1, test.tab1_copy.a1 from test.tab1 any left join test.tab1_copy copy on tab1.b1 + 3 = tab1_copy.b1 + 2 FORMAT JSONEachRow; select 'subquery'; select a1 from test.tab1 any left join (select * from test.tab2) on b1 = a2; @@ -104,4 +104,4 @@ select a1, a2, b1, b2 from test.tab1 first any left join (select * from test.tab select a1, a2, b1, b2 from test.tab1 first any left join (select *, a2 as z from test.tab2) second on first.b1 = second.z; select a1, a2, b1, b2 from test.tab1 first any left join (select *, a2 + 1 as z from test.tab2) second on first.b1 + 1 = second.z; select tab1.a1, a2, test.tab1.b1, second.b2 from test.tab1 first any left join (select * from test.tab2) second on first.b1 = second.a2; -select a1, s.a1 from test.tab1 any left join (select * from test.tab1_copy) s on tab1.b1 + 3 = b1 + 2 FORMAT JSONEachRow; +select a1, s.a1 from test.tab1 any left join (select * from test.tab1_copy) s on tab1.b1 + 3 = s.b1 + 2 FORMAT JSONEachRow; diff --git a/dbms/tests/queries/0_stateless/00703_join_crash.sql b/dbms/tests/queries/0_stateless/00703_join_crash.sql index 32c0668b3bf..29b86e055fb 100644 --- a/dbms/tests/queries/0_stateless/00703_join_crash.sql +++ b/dbms/tests/queries/0_stateless/00703_join_crash.sql @@ -7,7 +7,7 @@ create table test.tab1_copy (a1 Int32, b1 Int32) engine = MergeTree order by a1; insert into test.tab1 values (1, 2); insert into test.tab1_copy values (2, 3); -select tab1.a1, tab1_copy.a1, tab1.b1 from test.tab1 any left join test.tab1_copy on tab1.b1 + 3 = b1 + 2; +select tab1.a1, tab1_copy.a1, tab1.b1 from test.tab1 any left join test.tab1_copy on tab1.b1 + 3 = tab1_copy.b1 + 2; drop table test.tab1; drop table test.tab1_copy; From 9539467b59ea344971a0bbe2d9ac7d4f108c94ae Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 25 Jan 2019 18:48:53 +0300 Subject: [PATCH 135/586] hotfix --- dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 382c8043f2d..016d176caba 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -54,7 +54,7 @@ std::vector TranslateQualifiedNamesMatcher::visit(ASTIdentifier & iden { if (IdentifierSemantic::getColumnName(identifier)) { - bool best_table_pos = 0; + size_t best_table_pos = 0; size_t best_match = 0; for (size_t i = 0; i < data.tables.size(); ++i) if (size_t match = IdentifierSemantic::canReferColumnToTable(identifier, data.tables[i])) From 0d4b7ff82eac705b182906c66bc41ef81b80b406 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 25 Jan 2019 21:35:16 +0300 Subject: [PATCH 136/586] Refactoring in performance test (may be build, but doesn't work) --- dbms/programs/performance-test/CMakeLists.txt | 6 + .../performance-test/ConfigPreprocessor.cpp | 85 ++ .../performance-test/ConfigPreprocessor.h | 50 + .../performance-test/PerformanceTest.cpp | 1201 ++--------------- .../performance-test/PerformanceTest.h | 49 + .../performance-test/PerformanceTestInfo.cpp | 271 ++++ .../performance-test/PerformanceTestInfo.h | 52 + .../performance-test/PerformanceTestSuite.cpp | 400 ++++++ .../performance-test/ReportBuilder.cpp | 190 +++ .../programs/performance-test/ReportBuilder.h | 30 + dbms/programs/performance-test/TestStats.cpp | 1 + dbms/programs/performance-test/TestStats.h | 2 + .../performance-test/applySubstitutions.cpp | 82 ++ .../performance-test/applySubstitutions.h | 18 + .../performance-test/executeQuery.cpp | 72 + dbms/programs/performance-test/executeQuery.h | 16 + 16 files changed, 1465 insertions(+), 1060 deletions(-) create mode 100644 dbms/programs/performance-test/ConfigPreprocessor.cpp create mode 100644 dbms/programs/performance-test/ConfigPreprocessor.h create mode 100644 dbms/programs/performance-test/PerformanceTest.h create mode 100644 dbms/programs/performance-test/PerformanceTestInfo.cpp create mode 100644 dbms/programs/performance-test/PerformanceTestInfo.h create mode 100644 dbms/programs/performance-test/PerformanceTestSuite.cpp create mode 100644 dbms/programs/performance-test/ReportBuilder.cpp create mode 100644 dbms/programs/performance-test/ReportBuilder.h create mode 100644 dbms/programs/performance-test/applySubstitutions.cpp create mode 100644 dbms/programs/performance-test/applySubstitutions.h create mode 100644 dbms/programs/performance-test/executeQuery.cpp create mode 100644 dbms/programs/performance-test/executeQuery.h diff --git a/dbms/programs/performance-test/CMakeLists.txt b/dbms/programs/performance-test/CMakeLists.txt index 591a7180691..9c1e5e98423 100644 --- a/dbms/programs/performance-test/CMakeLists.txt +++ b/dbms/programs/performance-test/CMakeLists.txt @@ -3,7 +3,13 @@ add_library (clickhouse-performance-test-lib ${LINK_MODE} StopConditionsSet.cpp TestStopConditions.cpp TestStats.cpp + ConfigPreprocessor.cpp PerformanceTest.cpp + PerformanceTestInfo.cpp + executeQuery.cpp + applySubstitutions.cpp + ReportBuilder.cpp + PerformanceTestSuite.cpp ) target_link_libraries (clickhouse-performance-test-lib PRIVATE dbms clickhouse_common_io clickhouse_common_config ${Boost_PROGRAM_OPTIONS_LIBRARY}) target_include_directories (clickhouse-performance-test-lib SYSTEM PRIVATE ${PCG_RANDOM_INCLUDE_DIR}) diff --git a/dbms/programs/performance-test/ConfigPreprocessor.cpp b/dbms/programs/performance-test/ConfigPreprocessor.cpp new file mode 100644 index 00000000000..f03f6d7940f --- /dev/null +++ b/dbms/programs/performance-test/ConfigPreprocessor.cpp @@ -0,0 +1,85 @@ +#include "ConfigPreprocessor.h" +#include +#include +namespace DB +{ +std::vector ConfigPreprocessor::processConfig( + const Strings & tests_tags, + const Strings & tests_names, + const Strings & tests_names_regexp, + const Strings & skip_tags, + const Strings & skip_names, + const Strings & skip_names_regexp) const +{ + + std::vector result; + for (const auto & path : paths) + result.emplace_back(new XMLConfiguration(path)); + /// Leave tests: + removeConfigurationsIf(result, FilterType::Tag, tests_tags, true); + removeConfigurationsIf(result, FilterType::Name, tests_names, true); + removeConfigurationsIf(result, FilterType::Name_regexp, tests_names_regexp, true); + + /// Skip tests + removeConfigurationsIf(result, FilterType::Tag, skip_tags, false); + removeConfigurationsIf(result, FilterType::Name, skip_names, false); + removeConfigurationsIf(result, FilterType::Name_regexp, skip_names_regexp, false); + return result; +} + +void ConfigPreprocessor::removeConfigurationsIf( + std::vector & configs, + ConfigPreprocessor::FilterType filter_type, + const Strings & values, + bool leave) const +{ + auto checker = [&filter_type, &values, &leave] (XMLConfigurationPtr & config) + { + if (values.size() == 0) + return false; + + bool remove_or_not = false; + + if (filter_type == FilterType::Tag) + { + std::vector tags_keys; + config->keys("tags", tags_keys); + + Strings tags(tags_keys.size()); + for (size_t i = 0; i != tags_keys.size(); ++i) + tags[i] = config->getString("tags.tag[" + std::to_string(i) + "]"); + + for (const String & config_tag : tags) + { + if (std::find(values.begin(), values.end(), config_tag) != values.end()) + remove_or_not = true; + } + } + + if (filter_type == FilterType::Name) + { + remove_or_not = (std::find(values.begin(), values.end(), config->getString("name", "")) != values.end()); + } + + if (filter_type == FilterType::Name_regexp) + { + String config_name = config->getString("name", ""); + auto regex_checker = [&config_name](const String & name_regexp) + { + std::regex pattern(name_regexp); + return std::regex_search(config_name, pattern); + }; + + remove_or_not = config->has("name") ? (std::find_if(values.begin(), values.end(), regex_checker) != values.end()) : false; + } + + if (leave) + remove_or_not = !remove_or_not; + return remove_or_not; + }; + + auto new_end = std::remove_if(configs.begin(), configs.end(), checker); + configs.erase(new_end, configs.end()); +} + +} diff --git a/dbms/programs/performance-test/ConfigPreprocessor.h b/dbms/programs/performance-test/ConfigPreprocessor.h new file mode 100644 index 00000000000..49c85032b93 --- /dev/null +++ b/dbms/programs/performance-test/ConfigPreprocessor.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +using XMLConfiguration = Poco::Util::XMLConfiguration; +using XMLConfigurationPtr = Poco::AutoPtr; +using XMLDocumentPtr = Poco::AutoPtr; +using Strings = std::vector; + +class ConfigPreprocessor +{ +public: + ConfigPreprocessor(const std::vector & paths_) + : paths(paths_) + {} + + std::vector processConfig( + const Strings & tests_tags, + const Strings & tests_names, + const Strings & tests_names_regexp, + const Strings & skip_tags, + const Strings & skip_names, + const Strings & skip_names_regexp) const; + +private: + + enum class FilterType + { + Tag, + Name, + Name_regexp + }; + + /// Removes configurations that has a given value. + /// If leave is true, the logic is reversed. + void removeConfigurationsIf( + std::vector & configs, + FilterType filter_type, + const Strings & values, + bool leave = false) const; + + const std::vector paths; +}; +} diff --git a/dbms/programs/performance-test/PerformanceTest.cpp b/dbms/programs/performance-test/PerformanceTest.cpp index d5bfcc85c60..88b9617013c 100644 --- a/dbms/programs/performance-test/PerformanceTest.cpp +++ b/dbms/programs/performance-test/PerformanceTest.cpp @@ -1,1097 +1,178 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "PerformanceTest.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "JSONString.h" -#include "StopConditionsSet.h" -#include "TestStopConditions.h" -#include "TestStats.h" - -#ifndef __clang__ -#pragma GCC optimize("-fno-var-tracking-assignments") -#endif - - -/** Tests launcher for ClickHouse. - * The tool walks through given or default folder in order to find files with - * tests' descriptions and launches it. - */ -namespace fs = boost::filesystem; -using String = std::string; -const std::regex QUOTE_REGEX{"\""}; +#include +#include +#include "executeQuery.h" namespace DB { + namespace ErrorCodes { - extern const int NOT_IMPLEMENTED; - extern const int LOGICAL_ERROR; - extern const int BAD_ARGUMENTS; - extern const int FILE_DOESNT_EXIST; +extern const int NOT_IMPLEMENTED; +extern const int LOGICAL_ERROR; +extern const int BAD_ARGUMENTS; +extern const int FILE_DOESNT_EXIST; +} + +namespace fs = boost::filesystem; + +PerformanceTest::PerformanceTest( + const XMLConfigurationPtr & config_, + Connection & connection_, + InterruptListener & interrupt_listener_, + const PerformanceTestInfo & test_info_) + : config(config_) + , connection(connection_) + , interrupt_listener(interrupt_listener_) + , test_info(test_info_) +{ +} + +bool PerformanceTest::checkPreconditions() const +{ + if (!config->has("preconditions")) + return true; + + std::vector preconditions; + config->keys("preconditions", preconditions); + size_t table_precondition_index = 0; + + for (const String & precondition : preconditions) + { + if (precondition == "flush_disk_cache") + { + if (system( + "(>&2 echo 'Flushing disk cache...') && (sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches') && (>&2 echo 'Flushed.')")) + { + std::cerr << "Failed to flush disk cache" << std::endl; + return false; + } + } + + if (precondition == "ram_size") + { + size_t ram_size_needed = config->getUInt64("preconditions.ram_size"); + size_t actual_ram = getMemoryAmount(); + if (!actual_ram) + throw DB::Exception("ram_size precondition not available on this platform", DB::ErrorCodes::NOT_IMPLEMENTED); + + if (ram_size_needed > actual_ram) + { + std::cerr << "Not enough RAM: need = " << ram_size_needed << ", present = " << actual_ram << std::endl; + return false; + } + } + + if (precondition == "table_exists") + { + String precondition_key = "preconditions.table_exists[" + std::to_string(table_precondition_index++) + "]"; + String table_to_check = config->getString(precondition_key); + String query = "EXISTS TABLE " + table_to_check + ";"; + + size_t exist = 0; + + connection.sendQuery(query, "", QueryProcessingStage::Complete, &test_info.settings, nullptr, false); + + while (true) + { + Connection::Packet packet = connection.receivePacket(); + + if (packet.type == Protocol::Server::Data) + { + for (const ColumnWithTypeAndName & column : packet.block) + { + if (column.name == "result" && column.column->size() > 0) + { + exist = column.column->get64(0); + if (exist) + break; + } + } + } + + if (packet.type == Protocol::Server::Exception + || packet.type == Protocol::Server::EndOfStream) + break; + } + + if (!exist) + { + std::cerr << "Table " << table_to_check << " doesn't exist" << std::endl; + return false; + } + } + } + + return true; } -using ConfigurationPtr = Poco::AutoPtr; -class PerformanceTest : public Poco::Util::Application +std::vector PerformanceTest::execute() { -public: - using Strings = std::vector; - - PerformanceTest(const String & host_, - const UInt16 port_, - const bool secure_, - const String & default_database_, - const String & user_, - const String & password_, - const bool lite_output_, - const String & profiles_file_, - Strings && input_files_, - Strings && tests_tags_, - Strings && skip_tags_, - Strings && tests_names_, - Strings && skip_names_, - Strings && tests_names_regexp_, - Strings && skip_names_regexp_, - const ConnectionTimeouts & timeouts) - : connection(host_, port_, default_database_, user_, password_, timeouts, "performance-test", Protocol::Compression::Enable, secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable), - gotSIGINT(false), - lite_output(lite_output_), - profiles_file(profiles_file_), - input_files(input_files_), - tests_tags(std::move(tests_tags_)), - skip_tags(std::move(skip_tags_)), - tests_names(std::move(tests_names_)), - skip_names(std::move(skip_names_)), - tests_names_regexp(std::move(tests_names_regexp_)), - skip_names_regexp(std::move(skip_names_regexp_)) - { - if (input_files.size() < 1) - { - throw DB::Exception("No tests were specified", DB::ErrorCodes::BAD_ARGUMENTS); - } - } - - void initialize(Poco::Util::Application & self [[maybe_unused]]) - { - std::string home_path; - const char * home_path_cstr = getenv("HOME"); - if (home_path_cstr) - home_path = home_path_cstr; - configReadClient(Poco::Util::Application::instance().config(), home_path); - } - - int main(const std::vector < std::string > & /* args */) - { - std::string name; - UInt64 version_major; - UInt64 version_minor; - UInt64 version_patch; - UInt64 version_revision; - connection.getServerVersion(name, version_major, version_minor, version_patch, version_revision); - - std::stringstream ss; - ss << version_major << "." << version_minor << "." << version_patch; - server_version = ss.str(); - - processTestsConfigurations(input_files); - - return 0; - } - -private: - String test_name; - - using Query = String; - using Queries = std::vector; - using QueriesWithIndexes = std::vector>; - Queries queries; - - Connection connection; - std::string server_version; - - using Keys = std::vector; - - Settings settings; - Context global_context = Context::createGlobal(); - - InterruptListener interrupt_listener; - - using XMLConfiguration = Poco::Util::XMLConfiguration; - using XMLConfigurationPtr = Poco::AutoPtr; - - using Paths = std::vector; - using StringToVector = std::map>; - using StringToMap = std::map; - StringToMap substitutions; - - using StringKeyValue = std::map; - std::vector substitutions_maps; - - bool gotSIGINT; - std::vector stop_conditions_by_run; - String main_metric; - bool lite_output; - String profiles_file; - - Strings input_files; - std::vector tests_configurations; - - Strings tests_tags; - Strings skip_tags; - Strings tests_names; - Strings skip_names; - Strings tests_names_regexp; - Strings skip_names_regexp; - - enum class ExecutionType - { - Loop, - Once - }; - ExecutionType exec_type; - - enum class FilterType - { - Tag, - Name, - Name_regexp - }; - - size_t times_to_run = 1; std::vector statistics_by_run; - - /// Removes configurations that has a given value. If leave is true, the logic is reversed. - void removeConfigurationsIf( - std::vector & configs, FilterType filter_type, const Strings & values, bool leave = false) + statistics_by_run.resize(test_info.times_to_run * test_info.queries.size()); + for (size_t number_of_launch = 0; number_of_launch < test_info.times_to_run; ++number_of_launch) { - auto checker = [&filter_type, &values, &leave](XMLConfigurationPtr & config) + QueriesWithIndexes queries_with_indexes; + + for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index) { - if (values.size() == 0) - return false; + size_t statistic_index = number_of_launch * test_info.queries.size() + query_index; + test_info.stop_conditions_by_run[statistic_index].reset(); - bool remove_or_not = false; - - if (filter_type == FilterType::Tag) - { - Keys tags_keys; - config->keys("tags", tags_keys); - - Strings tags(tags_keys.size()); - for (size_t i = 0; i != tags_keys.size(); ++i) - tags[i] = config->getString("tags.tag[" + std::to_string(i) + "]"); - - for (const String & config_tag : tags) - { - if (std::find(values.begin(), values.end(), config_tag) != values.end()) - remove_or_not = true; - } - } - - if (filter_type == FilterType::Name) - { - remove_or_not = (std::find(values.begin(), values.end(), config->getString("name", "")) != values.end()); - } - - if (filter_type == FilterType::Name_regexp) - { - String config_name = config->getString("name", ""); - auto regex_checker = [&config_name](const String & name_regexp) - { - std::regex pattern(name_regexp); - return std::regex_search(config_name, pattern); - }; - - remove_or_not = config->has("name") ? (std::find_if(values.begin(), values.end(), regex_checker) != values.end()) : false; - } - - if (leave) - remove_or_not = !remove_or_not; - return remove_or_not; - }; - - auto new_end = std::remove_if(configs.begin(), configs.end(), checker); - configs.erase(new_end, configs.end()); - } - - /// Filter tests by tags, names, regexp matching, etc. - void filterConfigurations() - { - /// Leave tests: - removeConfigurationsIf(tests_configurations, FilterType::Tag, tests_tags, true); - removeConfigurationsIf(tests_configurations, FilterType::Name, tests_names, true); - removeConfigurationsIf(tests_configurations, FilterType::Name_regexp, tests_names_regexp, true); - - - /// Skip tests - removeConfigurationsIf(tests_configurations, FilterType::Tag, skip_tags, false); - removeConfigurationsIf(tests_configurations, FilterType::Name, skip_names, false); - removeConfigurationsIf(tests_configurations, FilterType::Name_regexp, skip_names_regexp, false); - } - - /// Checks specified preconditions per test (process cache, table existence, etc.) - bool checkPreconditions(const XMLConfigurationPtr & config) - { - if (!config->has("preconditions")) - return true; - - Keys preconditions; - config->keys("preconditions", preconditions); - size_t table_precondition_index = 0; - - for (const String & precondition : preconditions) - { - if (precondition == "flush_disk_cache") - { - if (system( - "(>&2 echo 'Flushing disk cache...') && (sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches') && (>&2 echo 'Flushed.')")) - { - std::cerr << "Failed to flush disk cache" << std::endl; - return false; - } - } - - if (precondition == "ram_size") - { - size_t ram_size_needed = config->getUInt64("preconditions.ram_size"); - size_t actual_ram = getMemoryAmount(); - if (!actual_ram) - throw DB::Exception("ram_size precondition not available on this platform", DB::ErrorCodes::NOT_IMPLEMENTED); - - if (ram_size_needed > actual_ram) - { - std::cerr << "Not enough RAM: need = " << ram_size_needed << ", present = " << actual_ram << std::endl; - return false; - } - } - - if (precondition == "table_exists") - { - String precondition_key = "preconditions.table_exists[" + std::to_string(table_precondition_index++) + "]"; - String table_to_check = config->getString(precondition_key); - String query = "EXISTS TABLE " + table_to_check + ";"; - - size_t exist = 0; - - connection.sendQuery(query, "", QueryProcessingStage::Complete, &settings, nullptr, false); - - while (true) - { - Connection::Packet packet = connection.receivePacket(); - - if (packet.type == Protocol::Server::Data) - { - for (const ColumnWithTypeAndName & column : packet.block) - { - if (column.name == "result" && column.column->size() > 0) - { - exist = column.column->get64(0); - if (exist) - break; - } - } - } - - if (packet.type == Protocol::Server::Exception || packet.type == Protocol::Server::EndOfStream) - break; - } - - if (!exist) - { - std::cerr << "Table " << table_to_check << " doesn't exist" << std::endl; - return false; - } - } - } - - return true; - } - - void processTestsConfigurations(const Paths & paths) - { - tests_configurations.resize(paths.size()); - - for (size_t i = 0; i != paths.size(); ++i) - { - const String path = paths[i]; - tests_configurations[i] = XMLConfigurationPtr(new XMLConfiguration(path)); - } - - filterConfigurations(); - - if (tests_configurations.size()) - { - Strings outputs; - - for (auto & test_config : tests_configurations) - { - if (!checkPreconditions(test_config)) - { - std::cerr << "Preconditions are not fulfilled for test '" + test_config->getString("name", "") + "' "; - continue; - } - - String output = runTest(test_config); - if (lite_output) - std::cout << output; - else - outputs.push_back(output); - } - - if (!lite_output && outputs.size()) - { - std::cout << "[" << std::endl; - - for (size_t i = 0; i != outputs.size(); ++i) - { - std::cout << outputs[i]; - if (i != outputs.size() - 1) - std::cout << ","; - - std::cout << std::endl; - } - - std::cout << "]" << std::endl; - } - } - } - - void extractSettings( - const XMLConfigurationPtr & config, const String & key, const Strings & settings_list, std::map & settings_to_apply) - { - for (const String & setup : settings_list) - { - if (setup == "profile") - continue; - - String value = config->getString(key + "." + setup); - if (value.empty()) - value = "true"; - - settings_to_apply[setup] = value; - } - } - - String runTest(XMLConfigurationPtr & test_config) - { - queries.clear(); - - test_name = test_config->getString("name"); - std::cerr << "Running: " << test_name << "\n"; - - if (test_config->has("settings")) - { - std::map settings_to_apply; - Keys config_settings; - test_config->keys("settings", config_settings); - - /// Preprocess configuration file - if (std::find(config_settings.begin(), config_settings.end(), "profile") != config_settings.end()) - { - if (!profiles_file.empty()) - { - String profile_name = test_config->getString("settings.profile"); - XMLConfigurationPtr profiles_config(new XMLConfiguration(profiles_file)); - - Keys profile_settings; - profiles_config->keys("profiles." + profile_name, profile_settings); - - extractSettings(profiles_config, "profiles." + profile_name, profile_settings, settings_to_apply); - } - } - - extractSettings(test_config, "settings", config_settings, settings_to_apply); - - /// This macro goes through all settings in the Settings.h - /// and, if found any settings in test's xml configuration - /// with the same name, sets its value to settings - std::map::iterator it; -#define EXTRACT_SETTING(TYPE, NAME, DEFAULT, DESCRIPTION) \ - it = settings_to_apply.find(#NAME); \ - if (it != settings_to_apply.end()) \ - settings.set(#NAME, settings_to_apply[#NAME]); - - APPLY_FOR_SETTINGS(EXTRACT_SETTING) - -#undef EXTRACT_SETTING - - if (std::find(config_settings.begin(), config_settings.end(), "average_rows_speed_precision") != config_settings.end()) - { - TestStats::avg_rows_speed_precision = test_config->getDouble("settings.average_rows_speed_precision"); - } - - if (std::find(config_settings.begin(), config_settings.end(), "average_bytes_speed_precision") != config_settings.end()) - { - TestStats::avg_bytes_speed_precision = test_config->getDouble("settings.average_bytes_speed_precision"); - } - } - - if (!test_config->has("query") && !test_config->has("query_file")) - { - throw DB::Exception("Missing query fields in test's config: " + test_name, DB::ErrorCodes::BAD_ARGUMENTS); - } - - if (test_config->has("query") && test_config->has("query_file")) - { - throw DB::Exception("Found both query and query_file fields. Choose only one", DB::ErrorCodes::BAD_ARGUMENTS); - } - - if (test_config->has("query")) - { - queries = DB::getMultipleValuesFromConfig(*test_config, "", "query"); - } - - if (test_config->has("query_file")) - { - const String filename = test_config->getString("query_file"); - if (filename.empty()) - throw DB::Exception("Empty file name", DB::ErrorCodes::BAD_ARGUMENTS); - - bool tsv = fs::path(filename).extension().string() == ".tsv"; - - ReadBufferFromFile query_file(filename); - Query query; - - if (tsv) - { - while (!query_file.eof()) - { - readEscapedString(query, query_file); - assertChar('\n', query_file); - queries.push_back(query); - } - } - else - { - readStringUntilEOF(query, query_file); - queries.push_back(query); - } - } - - if (queries.empty()) - { - throw DB::Exception("Did not find any query to execute: " + test_name, DB::ErrorCodes::BAD_ARGUMENTS); - } - - if (test_config->has("substitutions")) - { - /// Make "subconfig" of inner xml block - ConfigurationPtr substitutions_view(test_config->createView("substitutions")); - constructSubstitutions(substitutions_view, substitutions[test_name]); - - auto queries_pre_format = queries; - queries.clear(); - for (const auto & query : queries_pre_format) - { - auto formatted = formatQueries(query, substitutions[test_name]); - queries.insert(queries.end(), formatted.begin(), formatted.end()); - } - } - - if (!test_config->has("type")) - { - throw DB::Exception("Missing type property in config: " + test_name, DB::ErrorCodes::BAD_ARGUMENTS); - } - - String config_exec_type = test_config->getString("type"); - if (config_exec_type == "loop") - exec_type = ExecutionType::Loop; - else if (config_exec_type == "once") - exec_type = ExecutionType::Once; - else - throw DB::Exception("Unknown type " + config_exec_type + " in :" + test_name, DB::ErrorCodes::BAD_ARGUMENTS); - - times_to_run = test_config->getUInt("times_to_run", 1); - - stop_conditions_by_run.clear(); - TestStopConditions stop_conditions_template; - if (test_config->has("stop_conditions")) - { - ConfigurationPtr stop_conditions_config(test_config->createView("stop_conditions")); - stop_conditions_template.loadFromConfig(stop_conditions_config); - } - - if (stop_conditions_template.empty()) - throw DB::Exception("No termination conditions were found in config", DB::ErrorCodes::BAD_ARGUMENTS); - - for (size_t i = 0; i < times_to_run * queries.size(); ++i) - stop_conditions_by_run.push_back(stop_conditions_template); - - - ConfigurationPtr metrics_view(test_config->createView("metrics")); - Keys metrics; - metrics_view->keys(metrics); - - main_metric.clear(); - if (test_config->has("main_metric")) - { - Keys main_metrics; - test_config->keys("main_metric", main_metrics); - if (main_metrics.size()) - main_metric = main_metrics[0]; - } - - if (!main_metric.empty()) - { - if (std::find(metrics.begin(), metrics.end(), main_metric) == metrics.end()) - metrics.push_back(main_metric); - } - else - { - if (metrics.empty()) - throw DB::Exception("You shoud specify at least one metric", DB::ErrorCodes::BAD_ARGUMENTS); - main_metric = metrics[0]; - if (lite_output) - throw DB::Exception("Specify main_metric for lite output", DB::ErrorCodes::BAD_ARGUMENTS); - } - - if (metrics.size() > 0) - checkMetricsInput(metrics); - - statistics_by_run.resize(times_to_run * queries.size()); - for (size_t number_of_launch = 0; number_of_launch < times_to_run; ++number_of_launch) - { - QueriesWithIndexes queries_with_indexes; - - for (size_t query_index = 0; query_index < queries.size(); ++query_index) - { - size_t statistic_index = number_of_launch * queries.size() + query_index; - stop_conditions_by_run[statistic_index].reset(); - - queries_with_indexes.push_back({queries[query_index], statistic_index}); - } - - if (interrupt_listener.check()) - gotSIGINT = true; - - if (gotSIGINT) - break; - - runQueries(queries_with_indexes); - } - - if (lite_output) - return minOutput(); - else - return constructTotalInfo(metrics); - } - - void checkMetricsInput(const Strings & metrics) const - { - std::vector loop_metrics - = {"min_time", "quantiles", "total_time", "queries_per_second", "rows_per_second", "bytes_per_second"}; - - std::vector non_loop_metrics - = {"max_rows_per_second", "max_bytes_per_second", "avg_rows_per_second", "avg_bytes_per_second"}; - - if (exec_type == ExecutionType::Loop) - { - for (const String & metric : metrics) - if (std::find(non_loop_metrics.begin(), non_loop_metrics.end(), metric) != non_loop_metrics.end()) - throw DB::Exception("Wrong type of metric for loop execution type (" + metric + ")", DB::ErrorCodes::BAD_ARGUMENTS); - } - else - { - for (const String & metric : metrics) - if (std::find(loop_metrics.begin(), loop_metrics.end(), metric) != loop_metrics.end()) - throw DB::Exception("Wrong type of metric for non-loop execution type (" + metric + ")", DB::ErrorCodes::BAD_ARGUMENTS); - } - } - - void runQueries(const QueriesWithIndexes & queries_with_indexes) - { - for (const auto & [query, run_index] : queries_with_indexes) - { - TestStopConditions & stop_conditions = stop_conditions_by_run[run_index]; - TestStats & statistics = statistics_by_run[run_index]; - - statistics.clear(); - try - { - execute(query, statistics, stop_conditions); - - if (exec_type == ExecutionType::Loop) - { - for (size_t iteration = 1; !gotSIGINT; ++iteration) - { - stop_conditions.reportIterations(iteration); - if (stop_conditions.areFulfilled()) - break; - - execute(query, statistics, stop_conditions); - } - } - } - catch (const DB::Exception & e) - { - statistics.exception = e.what() + String(", ") + e.displayText(); - } - - if (!gotSIGINT) - { - statistics.ready = true; - } - } - } - - void execute(const Query & query, TestStats & statistics, TestStopConditions & stop_conditions) - { - statistics.watch_per_query.restart(); - statistics.last_query_was_cancelled = false; - statistics.last_query_rows_read = 0; - statistics.last_query_bytes_read = 0; - - RemoteBlockInputStream stream(connection, query, {}, global_context, &settings); - - stream.setProgressCallback( - [&](const Progress & value) { this->checkFulfilledConditionsAndUpdate(value, stream, statistics, stop_conditions); }); - - stream.readPrefix(); - while (Block block = stream.read()) - ; - stream.readSuffix(); - - if (!statistics.last_query_was_cancelled) - statistics.updateQueryInfo(); - - statistics.setTotalTime(); - } - - void checkFulfilledConditionsAndUpdate( - const Progress & progress, RemoteBlockInputStream & stream, TestStats & statistics, TestStopConditions & stop_conditions) - { - statistics.add(progress.rows, progress.bytes); - - stop_conditions.reportRowsRead(statistics.total_rows_read); - stop_conditions.reportBytesReadUncompressed(statistics.total_bytes_read); - stop_conditions.reportTotalTime(statistics.watch.elapsed() / (1000 * 1000)); - stop_conditions.reportMinTimeNotChangingFor(statistics.min_time_watch.elapsed() / (1000 * 1000)); - stop_conditions.reportMaxSpeedNotChangingFor(statistics.max_rows_speed_watch.elapsed() / (1000 * 1000)); - stop_conditions.reportAverageSpeedNotChangingFor(statistics.avg_rows_speed_watch.elapsed() / (1000 * 1000)); - - if (stop_conditions.areFulfilled()) - { - statistics.last_query_was_cancelled = true; - stream.cancel(false); + queries_with_indexes.push_back({test_info.queries[query_index], statistic_index}); } if (interrupt_listener.check()) - { - gotSIGINT = true; - statistics.last_query_was_cancelled = true; - stream.cancel(false); - } + break; + + runQueries(queries_with_indexes, statistics_by_run); } - - void constructSubstitutions(ConfigurationPtr & substitutions_view, StringToVector & out_substitutions) - { - Keys xml_substitutions; - substitutions_view->keys(xml_substitutions); - - for (size_t i = 0; i != xml_substitutions.size(); ++i) - { - const ConfigurationPtr xml_substitution(substitutions_view->createView("substitution[" + std::to_string(i) + "]")); - - /// Property values for substitution will be stored in a vector - /// accessible by property name - std::vector xml_values; - xml_substitution->keys("values", xml_values); - - String name = xml_substitution->getString("name"); - - for (size_t j = 0; j != xml_values.size(); ++j) - { - out_substitutions[name].push_back(xml_substitution->getString("values.value[" + std::to_string(j) + "]")); - } - } - } - - std::vector formatQueries(const String & query, StringToVector substitutions_to_generate) - { - std::vector queries_res; - runThroughAllOptionsAndPush(substitutions_to_generate.begin(), substitutions_to_generate.end(), query, queries_res); - return queries_res; - } - - /// Recursive method which goes through all substitution blocks in xml - /// and replaces property {names} by their values - void runThroughAllOptionsAndPush(StringToVector::iterator substitutions_left, - StringToVector::iterator substitutions_right, - const String & template_query, - std::vector & out_queries) - { - if (substitutions_left == substitutions_right) - { - out_queries.push_back(template_query); /// completely substituted query - return; - } - - String substitution_mask = "{" + substitutions_left->first + "}"; - - if (template_query.find(substitution_mask) == String::npos) /// nothing to substitute here - { - runThroughAllOptionsAndPush(std::next(substitutions_left), substitutions_right, template_query, out_queries); - return; - } - - for (const String & value : substitutions_left->second) - { - /// Copy query string for each unique permutation - Query query = template_query; - size_t substr_pos = 0; - - while (substr_pos != String::npos) - { - substr_pos = query.find(substitution_mask); - - if (substr_pos != String::npos) - query.replace(substr_pos, substitution_mask.length(), value); - } - - runThroughAllOptionsAndPush(std::next(substitutions_left), substitutions_right, query, out_queries); - } - } - -public: - String constructTotalInfo(Strings metrics) - { - JSONString json_output; - - json_output.set("hostname", getFQDNOrHostName()); - json_output.set("num_cores", getNumberOfPhysicalCPUCores()); - json_output.set("num_threads", std::thread::hardware_concurrency()); - json_output.set("ram", getMemoryAmount()); - json_output.set("server_version", server_version); - json_output.set("time", DateLUT::instance().timeToString(time(nullptr))); - json_output.set("test_name", test_name); - json_output.set("main_metric", main_metric); - - if (substitutions[test_name].size()) - { - JSONString json_parameters(2); /// here, 2 is the size of \t padding - - for (auto it = substitutions[test_name].begin(); it != substitutions[test_name].end(); ++it) - { - String parameter = it->first; - std::vector values = it->second; - - String array_string = "["; - for (size_t i = 0; i != values.size(); ++i) - { - array_string += '"' + std::regex_replace(values[i], QUOTE_REGEX, "\\\"") + '"'; - if (i != values.size() - 1) - { - array_string += ", "; - } - } - array_string += ']'; - - json_parameters.set(parameter, array_string); - } - - json_output.set("parameters", json_parameters.asString()); - } - - std::vector run_infos; - for (size_t query_index = 0; query_index < queries.size(); ++query_index) - { - for (size_t number_of_launch = 0; number_of_launch < times_to_run; ++number_of_launch) - { - TestStats & statistics = statistics_by_run[number_of_launch * queries.size() + query_index]; - - if (!statistics.ready) - continue; - - JSONString runJSON; - - runJSON.set("query", std::regex_replace(queries[query_index], QUOTE_REGEX, "\\\"")); - if (!statistics.exception.empty()) - runJSON.set("exception", statistics.exception); - - if (substitutions_maps.size()) - { - JSONString parameters(4); - - for (auto it = substitutions_maps[query_index].begin(); it != substitutions_maps[query_index].end(); ++it) - { - parameters.set(it->first, it->second); - } - - runJSON.set("parameters", parameters.asString()); - } - - - if (exec_type == ExecutionType::Loop) - { - /// in seconds - if (std::find(metrics.begin(), metrics.end(), "min_time") != metrics.end()) - runJSON.set("min_time", statistics.min_time / double(1000)); - - if (std::find(metrics.begin(), metrics.end(), "quantiles") != metrics.end()) - { - JSONString quantiles(4); /// here, 4 is the size of \t padding - for (double percent = 10; percent <= 90; percent += 10) - { - String quantile_key = std::to_string(percent / 100.0); - while (quantile_key.back() == '0') - quantile_key.pop_back(); - - quantiles.set(quantile_key, statistics.sampler.quantileInterpolated(percent / 100.0)); - } - quantiles.set("0.95", statistics.sampler.quantileInterpolated(95 / 100.0)); - quantiles.set("0.99", statistics.sampler.quantileInterpolated(99 / 100.0)); - quantiles.set("0.999", statistics.sampler.quantileInterpolated(99.9 / 100.0)); - quantiles.set("0.9999", statistics.sampler.quantileInterpolated(99.99 / 100.0)); - - runJSON.set("quantiles", quantiles.asString()); - } - - if (std::find(metrics.begin(), metrics.end(), "total_time") != metrics.end()) - runJSON.set("total_time", statistics.total_time); - - if (std::find(metrics.begin(), metrics.end(), "queries_per_second") != metrics.end()) - runJSON.set("queries_per_second", double(statistics.queries) / statistics.total_time); - - if (std::find(metrics.begin(), metrics.end(), "rows_per_second") != metrics.end()) - runJSON.set("rows_per_second", double(statistics.total_rows_read) / statistics.total_time); - - if (std::find(metrics.begin(), metrics.end(), "bytes_per_second") != metrics.end()) - runJSON.set("bytes_per_second", double(statistics.total_bytes_read) / statistics.total_time); - } - else - { - if (std::find(metrics.begin(), metrics.end(), "max_rows_per_second") != metrics.end()) - runJSON.set("max_rows_per_second", statistics.max_rows_speed); - - if (std::find(metrics.begin(), metrics.end(), "max_bytes_per_second") != metrics.end()) - runJSON.set("max_bytes_per_second", statistics.max_bytes_speed); - - if (std::find(metrics.begin(), metrics.end(), "avg_rows_per_second") != metrics.end()) - runJSON.set("avg_rows_per_second", statistics.avg_rows_speed_value); - - if (std::find(metrics.begin(), metrics.end(), "avg_bytes_per_second") != metrics.end()) - runJSON.set("avg_bytes_per_second", statistics.avg_bytes_speed_value); - } - - run_infos.push_back(runJSON); - } - } - - json_output.set("runs", run_infos); - - return json_output.asString(); - } - - String minOutput() - { - String output; - - for (size_t query_index = 0; query_index < queries.size(); ++query_index) - { - for (size_t number_of_launch = 0; number_of_launch < times_to_run; ++number_of_launch) - { - if (queries.size() > 1) - { - output += "query \"" + queries[query_index] + "\", "; - } - - if (substitutions_maps.size()) - { - for (auto it = substitutions_maps[query_index].begin(); it != substitutions_maps[query_index].end(); ++it) - { - output += it->first + " = " + it->second + ", "; - } - } - - output += "run " + std::to_string(number_of_launch + 1) + ": "; - output += main_metric + " = "; - output += statistics_by_run[number_of_launch * queries.size() + query_index].getStatisticByName(main_metric); - output += "\n"; - } - } - - return output; - } -}; + return statistics_by_run; } -static void getFilesFromDir(const fs::path & dir, std::vector & input_files, const bool recursive = false) + +void PerformanceTest::runQueries( + const QueriesWithIndexes & queries_with_indexes, + std::vector & statistics_by_run) { - if (dir.extension().string() == ".xml") - std::cerr << "Warning: '" + dir.string() + "' is a directory, but has .xml extension" << std::endl; - - fs::directory_iterator end; - for (fs::directory_iterator it(dir); it != end; ++it) + for (const auto & [query, run_index] : queries_with_indexes) { - const fs::path file = (*it); - if (recursive && fs::is_directory(file)) - getFilesFromDir(file, input_files, recursive); - else if (!fs::is_directory(file) && file.extension().string() == ".xml") - input_files.push_back(file.string()); - } -} + TestStopConditions & stop_conditions = test_info.stop_conditions_by_run[run_index]; + TestStats & statistics = statistics_by_run[run_index]; - -int mainEntryClickHousePerformanceTest(int argc, char ** argv) -try -{ - using boost::program_options::value; - using Strings = std::vector; - - boost::program_options::options_description desc("Allowed options"); - desc.add_options() - ("help", "produce help message") - ("lite", "use lite version of output") - ("profiles-file", value()->default_value(""), "Specify a file with global profiles") - ("host,h", value()->default_value("localhost"), "") - ("port", value()->default_value(9000), "") - ("secure,s", "Use TLS connection") - ("database", value()->default_value("default"), "") - ("user", value()->default_value("default"), "") - ("password", value()->default_value(""), "") - ("tags", value()->multitoken(), "Run only tests with tag") - ("skip-tags", value()->multitoken(), "Do not run tests with tag") - ("names", value()->multitoken(), "Run tests with specific name") - ("skip-names", value()->multitoken(), "Do not run tests with name") - ("names-regexp", value()->multitoken(), "Run tests with names matching regexp") - ("skip-names-regexp", value()->multitoken(), "Do not run tests with names matching regexp") - ("recursive,r", "Recurse in directories to find all xml's"); - - /// These options will not be displayed in --help - boost::program_options::options_description hidden("Hidden options"); - hidden.add_options() - ("input-files", value>(), ""); - - /// But they will be legit, though. And they must be given without name - boost::program_options::positional_options_description positional; - positional.add("input-files", -1); - - boost::program_options::options_description cmdline_options; - cmdline_options.add(desc).add(hidden); - - boost::program_options::variables_map options; - boost::program_options::store( - boost::program_options::command_line_parser(argc, argv).options(cmdline_options).positional(positional).run(), options); - boost::program_options::notify(options); - - if (options.count("help")) - { - std::cout << "Usage: " << argv[0] << " [options] [test_file ...] [tests_folder]\n"; - std::cout << desc << "\n"; - return 0; - } - - Strings input_files; - bool recursive = options.count("recursive"); - - if (!options.count("input-files")) - { - std::cerr << "Trying to find test scenario files in the current folder..."; - fs::path curr_dir("."); - - getFilesFromDir(curr_dir, input_files, recursive); - - if (input_files.empty()) + statistics.clear(); + try { - std::cerr << std::endl; - throw DB::Exception("Did not find any xml files", DB::ErrorCodes::BAD_ARGUMENTS); - } - else - std::cerr << " found " << input_files.size() << " files." << std::endl; - } - else - { - input_files = options["input-files"].as(); - Strings collected_files; + executeQuery(connection, query, statistics, stop_conditions, interrupt_listener); - for (const String & filename : input_files) - { - fs::path file(filename); - - if (!fs::exists(file)) - throw DB::Exception("File '" + filename + "' does not exist", DB::ErrorCodes::FILE_DOESNT_EXIST); - - if (fs::is_directory(file)) + if (test_info.exec_type == ExecutionType::Loop) { - getFilesFromDir(file, collected_files, recursive); - } - else - { - if (file.extension().string() != ".xml") - throw DB::Exception("File '" + filename + "' does not have .xml extension", DB::ErrorCodes::BAD_ARGUMENTS); - collected_files.push_back(filename); + for (size_t iteration = 1; !statistics.got_SIGINT; ++iteration) + { + stop_conditions.reportIterations(iteration); + if (stop_conditions.areFulfilled()) + break; + + executeQuery(connection, query, statistics, stop_conditions, interrupt_listener); + } } } + catch (const DB::Exception & e) + { + statistics.exception = e.what() + String(", ") + e.displayText(); + } - input_files = std::move(collected_files); + if (!statistics.got_SIGINT) + statistics.ready = true; } - - Strings tests_tags = options.count("tags") ? options["tags"].as() : Strings({}); - Strings skip_tags = options.count("skip-tags") ? options["skip-tags"].as() : Strings({}); - Strings tests_names = options.count("names") ? options["names"].as() : Strings({}); - Strings skip_names = options.count("skip-names") ? options["skip-names"].as() : Strings({}); - Strings tests_names_regexp = options.count("names-regexp") ? options["names-regexp"].as() : Strings({}); - Strings skip_names_regexp = options.count("skip-names-regexp") ? options["skip-names-regexp"].as() : Strings({}); - - auto timeouts = DB::ConnectionTimeouts::getTCPTimeoutsWithoutFailover(DB::Settings()); - - DB::UseSSL use_ssl; - - DB::PerformanceTest performance_test( - options["host"].as(), - options["port"].as(), - options.count("secure"), - options["database"].as(), - options["user"].as(), - options["password"].as(), - options.count("lite") > 0, - options["profiles-file"].as(), - std::move(input_files), - std::move(tests_tags), - std::move(skip_tags), - std::move(tests_names), - std::move(skip_names), - std::move(tests_names_regexp), - std::move(skip_names_regexp), - timeouts); - return performance_test.run(); } -catch (...) -{ - std::cout << DB::getCurrentExceptionMessage(/*with stacktrace = */ true) << std::endl; - int code = DB::getCurrentExceptionCode(); - return code ? code : 1; + + } diff --git a/dbms/programs/performance-test/PerformanceTest.h b/dbms/programs/performance-test/PerformanceTest.h new file mode 100644 index 00000000000..cebddacfc56 --- /dev/null +++ b/dbms/programs/performance-test/PerformanceTest.h @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include +#include "PerformanceTestInfo.h" + + +namespace DB +{ + +using XMLConfiguration = Poco::Util::XMLConfiguration; +using XMLConfigurationPtr = Poco::AutoPtr; +using QueriesWithIndexes = std::vector>; + + +class PerformanceTest +{ +public: + + PerformanceTest( + const XMLConfigurationPtr & config_, + Connection & connection_, + InterruptListener & interrupt_listener_, + const PerformanceTestInfo & test_info_); + + bool checkPreconditions() const; + std::vector execute(); + + const PerformanceTestInfo & getTestInfo() const + { + return test_info; + } + +private: + void runQueries( + const QueriesWithIndexes & queries_with_indexes, + std::vector & statistics_by_run); + + +private: + XMLConfigurationPtr config; + Connection & connection; + InterruptListener & interrupt_listener; + + PerformanceTestInfo test_info; + +}; +} diff --git a/dbms/programs/performance-test/PerformanceTestInfo.cpp b/dbms/programs/performance-test/PerformanceTestInfo.cpp new file mode 100644 index 00000000000..c7a45921eb2 --- /dev/null +++ b/dbms/programs/performance-test/PerformanceTestInfo.cpp @@ -0,0 +1,271 @@ +#include "PerformanceTestInfo.h" +#include +#include +#include +#include +#include +#include "applySubstitutions.h" + +namespace DB +{ +namespace ErrorCodes +{ +extern const int NOT_IMPLEMENTED; +extern const int LOGICAL_ERROR; +extern const int BAD_ARGUMENTS; +extern const int FILE_DOESNT_EXIST; +} + +namespace +{ + +void extractSettings( + const XMLConfigurationPtr & config, + const String & key, + const Strings & settings_list, + std::map & settings_to_apply) +{ + for (const String & setup : settings_list) + { + if (setup == "profile") + continue; + + String value = config->getString(key + "." + setup); + if (value.empty()) + value = "true"; + + settings_to_apply[setup] = value; + } +} + +void checkMetricsInput(const std::vector & metrics, ExecutionType exec_type) +{ + std::vector loop_metrics = { + "min_time", "quantiles", "total_time", + "queries_per_second", "rows_per_second", + "bytes_per_second"}; + + std::vector non_loop_metrics = { + "max_rows_per_second", "max_bytes_per_second", + "avg_rows_per_second", "avg_bytes_per_second"}; + + if (exec_type == ExecutionType::Loop) + { + for (const std::string & metric : metrics) + { + auto non_loop_pos = + std::find(non_loop_metrics.begin(), non_loop_metrics.end(), metric); + + if (non_loop_pos != non_loop_metrics.end()) + throw Exception("Wrong type of metric for loop execution type (" + metric + ")", + ErrorCodes::BAD_ARGUMENTS); + } + } + else + { + for (const std::string & metric : metrics) + { + auto loop_pos = std::find(loop_metrics.begin(), loop_metrics.end(), metric); + if (loop_pos != loop_metrics.end()) + throw Exception( + "Wrong type of metric for non-loop execution type (" + metric + ")", + ErrorCodes::BAD_ARGUMENTS); + } + } +} + +} + + +namespace fs = boost::filesystem; + +PerformanceTestInfo::PerformanceTestInfo( + XMLConfigurationPtr config, + const std::string & profiles_file_) + : profiles_file(profiles_file_) +{ + applySettings(config); + extractQueries(config); + processSubstitutions(config); + getExecutionType(config); + getStopConditions(config); + getMetrics(config); +} + +void PerformanceTestInfo::applySettings(XMLConfigurationPtr config) +{ + if (config->has("settings")) + { + std::map settings_to_apply; + std::vector config_settings; + config->keys("settings", config_settings); + + auto settings_contain = [&config_settings] (const std::string & setting) + { + auto position = std::find(config_settings.begin(), config_settings.end(), setting); + return position != config_settings.end(); + + }; + /// Preprocess configuration file + if (settings_contain("profile")) + { + if (!profiles_file.empty()) + { + String profile_name = config->getString("settings.profile"); + XMLConfigurationPtr profiles_config(new XMLConfiguration(profiles_file)); + + std::vector profile_settings; + profiles_config->keys("profiles." + profile_name, profile_settings); + + extractSettings(profiles_config, "profiles." + profile_name, profile_settings, settings_to_apply); + } + } + + extractSettings(config, "settings", config_settings, settings_to_apply); + + /// This macro goes through all settings in the Settings.h + /// and, if found any settings in test's xml configuration + /// with the same name, sets its value to settings + std::map::iterator it; +#define EXTRACT_SETTING(TYPE, NAME, DEFAULT, DESCRIPTION) \ + it = settings_to_apply.find(#NAME); \ + if (it != settings_to_apply.end()) \ + settings.set(#NAME, settings_to_apply[#NAME]); + + APPLY_FOR_SETTINGS(EXTRACT_SETTING) + +#undef EXTRACT_SETTING + + if (settings_contain("average_rows_speed_precision")) + TestStats::avg_rows_speed_precision = + config->getDouble("settings.average_rows_speed_precision"); + + if (settings_contain("average_bytes_speed_precision")) + TestStats::avg_bytes_speed_precision = + config->getDouble("settings.average_bytes_speed_precision"); + } +} + +void PerformanceTestInfo::extractQueries(XMLConfigurationPtr config) +{ + if (config->has("query")) + queries = getMultipleValuesFromConfig(*config, "", "query"); + + if (config->has("query_file")) + { + const String filename = config->getString("query_file"); + if (filename.empty()) + throw Exception("Empty file name", ErrorCodes::BAD_ARGUMENTS); + + bool tsv = fs::path(filename).extension().string() == ".tsv"; + + ReadBufferFromFile query_file(filename); + std::string query; + + if (tsv) + { + while (!query_file.eof()) + { + readEscapedString(query, query_file); + assertChar('\n', query_file); + queries.push_back(query); + } + } + else + { + readStringUntilEOF(query, query_file); + queries.push_back(query); + } + } + + if (queries.empty()) + throw Exception("Did not find any query to execute: " + test_name, + ErrorCodes::BAD_ARGUMENTS); +} + +void PerformanceTestInfo::processSubstitutions(XMLConfigurationPtr config) +{ + if (config->has("substitutions")) + { + /// Make "subconfig" of inner xml block + ConfigurationPtr substitutions_view(config->createView("substitutions")); + constructSubstitutions(substitutions_view, substitutions); + + auto queries_pre_format = queries; + queries.clear(); + for (const auto & query : queries_pre_format) + { + auto formatted = formatQueries(query, substitutions); + queries.insert(queries.end(), formatted.begin(), formatted.end()); + } + } +} + +void PerformanceTestInfo::getExecutionType(XMLConfigurationPtr config) +{ + if (!config->has("type")) + throw Exception("Missing type property in config: " + test_name, + ErrorCodes::BAD_ARGUMENTS); + + String config_exec_type = config->getString("type"); + if (config_exec_type == "loop") + exec_type = ExecutionType::Loop; + else if (config_exec_type == "once") + exec_type = ExecutionType::Once; + else + throw Exception("Unknown type " + config_exec_type + " in :" + test_name, + ErrorCodes::BAD_ARGUMENTS); +} + + +void PerformanceTestInfo::getStopConditions(XMLConfigurationPtr config) +{ + TestStopConditions stop_conditions_template; + if (config->has("stop_conditions")) + { + ConfigurationPtr stop_conditions_config(config->createView("stop_conditions")); + stop_conditions_template.loadFromConfig(stop_conditions_config); + } + + if (stop_conditions_template.empty()) + throw Exception("No termination conditions were found in config", + ErrorCodes::BAD_ARGUMENTS); + + for (size_t i = 0; i < times_to_run * queries.size(); ++i) + stop_conditions_by_run.push_back(stop_conditions_template); + + times_to_run = config->getUInt("times_to_run", 1); +} + + +void PerformanceTestInfo::getMetrics(XMLConfigurationPtr config) +{ + ConfigurationPtr metrics_view(config->createView("metrics")); + metrics_view->keys(metrics); + + if (config->has("main_metric")) + { + std::vector main_metrics; + config->keys("main_metric", main_metrics); + if (main_metrics.size()) + main_metric = main_metrics[0]; + } + + if (!main_metric.empty()) + { + if (std::find(metrics.begin(), metrics.end(), main_metric) == metrics.end()) + metrics.push_back(main_metric); + } + else + { + if (metrics.empty()) + throw Exception("You shoud specify at least one metric", + ErrorCodes::BAD_ARGUMENTS); + main_metric = metrics[0]; + } + + if (metrics.size() > 0) + checkMetricsInput(metrics, exec_type); +} + +} diff --git a/dbms/programs/performance-test/PerformanceTestInfo.h b/dbms/programs/performance-test/PerformanceTestInfo.h new file mode 100644 index 00000000000..c788a4f989a --- /dev/null +++ b/dbms/programs/performance-test/PerformanceTestInfo.h @@ -0,0 +1,52 @@ +#pragma once +#include +#include +#include +#include +#include +#include + +#include "StopConditionsSet.h" +#include "TestStopConditions.h" +#include "TestStats.h" + +namespace DB +{ +enum class ExecutionType +{ + Loop, + Once +}; + +using XMLConfiguration = Poco::Util::XMLConfiguration; +using XMLConfigurationPtr = Poco::AutoPtr; +using StringToVector = std::map>; + +class PerformanceTestInfo +{ +public: + PerformanceTestInfo(XMLConfigurationPtr config, const std::string & profiles_file_); + + std::string test_name; + std::string main_metric; + + std::vector queries; + std::vector metrics; + + Settings settings; + ExecutionType exec_type; + StringToVector substitutions; + size_t times_to_run; + std::string profiles_file; + std::vector stop_conditions_by_run; + +private: + void applySettings(XMLConfigurationPtr config); + void extractQueries(XMLConfigurationPtr config); + void processSubstitutions(XMLConfigurationPtr config); + void getExecutionType(XMLConfigurationPtr config); + void getStopConditions(XMLConfigurationPtr config); + void getMetrics(XMLConfigurationPtr config); +}; + +} diff --git a/dbms/programs/performance-test/PerformanceTestSuite.cpp b/dbms/programs/performance-test/PerformanceTestSuite.cpp new file mode 100644 index 00000000000..29cb91afac5 --- /dev/null +++ b/dbms/programs/performance-test/PerformanceTestSuite.cpp @@ -0,0 +1,400 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "JSONString.h" +#include "StopConditionsSet.h" +#include "TestStopConditions.h" +#include "TestStats.h" +#include "ConfigPreprocessor.h" +#include "PerformanceTest.h" +#include "ReportBuilder.h" + +#ifndef __clang__ +#pragma GCC optimize("-fno-var-tracking-assignments") +#endif + + +/** Tests launcher for ClickHouse. + * The tool walks through given or default folder in order to find files with + * tests' descriptions and launches it. + */ +namespace fs = boost::filesystem; +using String = std::string; + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; + extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; + extern const int FILE_DOESNT_EXIST; +} + + +using ConfigurationPtr = Poco::AutoPtr; + +class PerformanceTestSuite : public Poco::Util::Application +{ +public: + using Strings = std::vector; + + PerformanceTestSuite(const String & host_, + const UInt16 port_, + const bool secure_, + const String & default_database_, + const String & user_, + const String & password_, + const bool lite_output_, + const String & profiles_file_, + Strings && input_files_, + Strings && tests_tags_, + Strings && skip_tags_, + Strings && tests_names_, + Strings && skip_names_, + Strings && tests_names_regexp_, + Strings && skip_names_regexp_, + const ConnectionTimeouts & timeouts) + : connection(host_, port_, default_database_, user_, password_, timeouts, "performance-test", Protocol::Compression::Enable, secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable), + lite_output(lite_output_), + profiles_file(profiles_file_), + input_files(input_files_), + tests_tags(std::move(tests_tags_)), + skip_tags(std::move(skip_tags_)), + tests_names(std::move(tests_names_)), + skip_names(std::move(skip_names_)), + tests_names_regexp(std::move(tests_names_regexp_)), + skip_names_regexp(std::move(skip_names_regexp_)) + { + if (input_files.size() < 1) + { + throw DB::Exception("No tests were specified", DB::ErrorCodes::BAD_ARGUMENTS); + } + } + + void initialize(Poco::Util::Application & self [[maybe_unused]]) + { + std::string home_path; + const char * home_path_cstr = getenv("HOME"); + if (home_path_cstr) + home_path = home_path_cstr; + configReadClient(Poco::Util::Application::instance().config(), home_path); + } + + int main(const std::vector < std::string > & /* args */) + { + std::string name; + UInt64 version_major; + UInt64 version_minor; + UInt64 version_patch; + UInt64 version_revision; + connection.getServerVersion(name, version_major, version_minor, version_patch, version_revision); + + std::stringstream ss; + ss << version_major << "." << version_minor << "." << version_patch; + server_version = ss.str(); + + report_builder = std::make_shared(server_version); + + processTestsConfigurations(input_files); + + return 0; + } + +private: + std::string test_name; + + const Strings & tests_tags; + const Strings & tests_names; + const Strings & tests_names_regexp; + const Strings & skip_tags; + const Strings & skip_names; + const Strings & skip_names_regexp; + + std::shared_ptr report_builder; + using Query = String; + using Queries = std::vector; + using QueriesWithIndexes = std::vector>; + Queries queries; + + Connection connection; + std::string server_version; + + using Keys = std::vector; + + InterruptListener interrupt_listener; + + using XMLConfiguration = Poco::Util::XMLConfiguration; + using XMLConfigurationPtr = Poco::AutoPtr; + + using Paths = std::vector; + using StringToVector = std::map>; + using StringToMap = std::map; + StringToMap substitutions; + + + std::vector stop_conditions_by_run; + String main_metric; + bool lite_output; + String profiles_file; + + Strings input_files; + std::vector tests_configurations; + + + enum class ExecutionType + { + Loop, + Once + }; + ExecutionType exec_type; + + + size_t times_to_run = 1; + std::vector statistics_by_run; + + void processTestsConfigurations(const Paths & paths) + { + ConfigPreprocessor config_prep(paths); + tests_configurations = config_prep.processConfig( + tests_tags, + tests_names, + tests_names_regexp, + skip_tags, + skip_names, + skip_names_regexp); + + if (tests_configurations.size()) + { + Strings outputs; + + for (auto & test_config : tests_configurations) + { + String output = runTest(test_config); + if (lite_output) + std::cout << output; + else + outputs.push_back(output); + } + + if (!lite_output && outputs.size()) + { + std::cout << "[" << std::endl; + + for (size_t i = 0; i != outputs.size(); ++i) + { + std::cout << outputs[i]; + if (i != outputs.size() - 1) + std::cout << ","; + + std::cout << std::endl; + } + + std::cout << "]" << std::endl; + } + } + } + + String runTest(XMLConfigurationPtr & test_config) + { + //test_name = test_config->getString("name"); + //std::cerr << "Running: " << test_name << "\n"; + + PerformanceTestInfo info(test_config, profiles_file); + PerformanceTest current(test_config, connection, interrupt_listener, info); + current.checkPreconditions(); + + auto result = current.execute(); + + + if (lite_output) + return report_builder->buildCompactReport(info, result); + else + return report_builder->buildFullReport(info, result); + } + +}; +} + +static void getFilesFromDir(const fs::path & dir, std::vector & input_files, const bool recursive = false) +{ + if (dir.extension().string() == ".xml") + std::cerr << "Warning: '" + dir.string() + "' is a directory, but has .xml extension" << std::endl; + + fs::directory_iterator end; + for (fs::directory_iterator it(dir); it != end; ++it) + { + const fs::path file = (*it); + if (recursive && fs::is_directory(file)) + getFilesFromDir(file, input_files, recursive); + else if (!fs::is_directory(file) && file.extension().string() == ".xml") + input_files.push_back(file.string()); + } +} + + +int mainEntryClickHousePerformanceTest(int argc, char ** argv) +try +{ + using boost::program_options::value; + using Strings = std::vector; + + boost::program_options::options_description desc("Allowed options"); + desc.add_options() + ("help", "produce help message") + ("lite", "use lite version of output") + ("profiles-file", value()->default_value(""), "Specify a file with global profiles") + ("host,h", value()->default_value("localhost"), "") + ("port", value()->default_value(9000), "") + ("secure,s", "Use TLS connection") + ("database", value()->default_value("default"), "") + ("user", value()->default_value("default"), "") + ("password", value()->default_value(""), "") + ("tags", value()->multitoken(), "Run only tests with tag") + ("skip-tags", value()->multitoken(), "Do not run tests with tag") + ("names", value()->multitoken(), "Run tests with specific name") + ("skip-names", value()->multitoken(), "Do not run tests with name") + ("names-regexp", value()->multitoken(), "Run tests with names matching regexp") + ("skip-names-regexp", value()->multitoken(), "Do not run tests with names matching regexp") + ("recursive,r", "Recurse in directories to find all xml's"); + + /// These options will not be displayed in --help + boost::program_options::options_description hidden("Hidden options"); + hidden.add_options() + ("input-files", value>(), ""); + + /// But they will be legit, though. And they must be given without name + boost::program_options::positional_options_description positional; + positional.add("input-files", -1); + + boost::program_options::options_description cmdline_options; + cmdline_options.add(desc).add(hidden); + + boost::program_options::variables_map options; + boost::program_options::store( + boost::program_options::command_line_parser(argc, argv).options(cmdline_options).positional(positional).run(), options); + boost::program_options::notify(options); + + if (options.count("help")) + { + std::cout << "Usage: " << argv[0] << " [options] [test_file ...] [tests_folder]\n"; + std::cout << desc << "\n"; + return 0; + } + + Strings input_files; + bool recursive = options.count("recursive"); + + if (!options.count("input-files")) + { + std::cerr << "Trying to find test scenario files in the current folder..."; + fs::path curr_dir("."); + + getFilesFromDir(curr_dir, input_files, recursive); + + if (input_files.empty()) + { + std::cerr << std::endl; + throw DB::Exception("Did not find any xml files", DB::ErrorCodes::BAD_ARGUMENTS); + } + else + std::cerr << " found " << input_files.size() << " files." << std::endl; + } + else + { + input_files = options["input-files"].as(); + Strings collected_files; + + for (const String & filename : input_files) + { + fs::path file(filename); + + if (!fs::exists(file)) + throw DB::Exception("File '" + filename + "' does not exist", DB::ErrorCodes::FILE_DOESNT_EXIST); + + if (fs::is_directory(file)) + { + getFilesFromDir(file, collected_files, recursive); + } + else + { + if (file.extension().string() != ".xml") + throw DB::Exception("File '" + filename + "' does not have .xml extension", DB::ErrorCodes::BAD_ARGUMENTS); + collected_files.push_back(filename); + } + } + + input_files = std::move(collected_files); + } + + Strings tests_tags = options.count("tags") ? options["tags"].as() : Strings({}); + Strings skip_tags = options.count("skip-tags") ? options["skip-tags"].as() : Strings({}); + Strings tests_names = options.count("names") ? options["names"].as() : Strings({}); + Strings skip_names = options.count("skip-names") ? options["skip-names"].as() : Strings({}); + Strings tests_names_regexp = options.count("names-regexp") ? options["names-regexp"].as() : Strings({}); + Strings skip_names_regexp = options.count("skip-names-regexp") ? options["skip-names-regexp"].as() : Strings({}); + + auto timeouts = DB::ConnectionTimeouts::getTCPTimeoutsWithoutFailover(DB::Settings()); + + DB::UseSSL use_ssl; + + DB::PerformanceTestSuite performance_test( + options["host"].as(), + options["port"].as(), + options.count("secure"), + options["database"].as(), + options["user"].as(), + options["password"].as(), + options.count("lite") > 0, + options["profiles-file"].as(), + std::move(input_files), + std::move(tests_tags), + std::move(skip_tags), + std::move(tests_names), + std::move(skip_names), + std::move(tests_names_regexp), + std::move(skip_names_regexp), + timeouts); + return performance_test.run(); +} +catch (...) +{ + std::cout << DB::getCurrentExceptionMessage(/*with stacktrace = */ true) << std::endl; + int code = DB::getCurrentExceptionCode(); + return code ? code : 1; +} diff --git a/dbms/programs/performance-test/ReportBuilder.cpp b/dbms/programs/performance-test/ReportBuilder.cpp new file mode 100644 index 00000000000..cd381aefa5e --- /dev/null +++ b/dbms/programs/performance-test/ReportBuilder.cpp @@ -0,0 +1,190 @@ +#include "ReportBuilder.h" +#include "JSONString.h" +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace +{ +const std::regex QUOTE_REGEX{"\""}; +} + +ReportBuilder::ReportBuilder(const std::string & server_version_) + : server_version(server_version_) + , hostname(getFQDNOrHostName()) + , num_cores(getNumberOfPhysicalCPUCores()) + , num_threads(std::thread::hardware_concurrency()) + , ram(getMemoryAmount()) +{ +} + +std::string ReportBuilder::getCurrentTime() const +{ + return DateLUT::instance().timeToString(time(nullptr)); +} + +std::string ReportBuilder::buildFullReport( + const PerformanceTestInfo & test_info, + std::vector & stats) const +{ + JSONString json_output; + + json_output.set("hostname", hostname); + json_output.set("num_cores", num_cores); + json_output.set("num_threads", num_threads); + json_output.set("ram", ram); + json_output.set("server_version", server_version); + json_output.set("time", getCurrentTime()); + json_output.set("test_name", test_info.test_name); + json_output.set("main_metric", test_info.main_metric); + + auto has_metric = [&test_info] (const std::string & metric_name) + { + return std::find(test_info.metrics.begin(), + test_info.metrics.end(), metric_name) != test_info.metrics.end(); + }; + + if (test_info.substitutions.size()) + { + JSONString json_parameters(2); /// here, 2 is the size of \t padding + + for (auto it = test_info.substitutions.begin(); it != test_info.substitutions.end(); ++it) + { + String parameter = it->first; + std::vector values = it->second; + + String array_string = "["; + for (size_t i = 0; i != values.size(); ++i) + { + array_string += '"' + std::regex_replace(values[i], QUOTE_REGEX, "\\\"") + '"'; + if (i != values.size() - 1) + { + array_string += ", "; + } + } + array_string += ']'; + + json_parameters.set(parameter, array_string); + } + + json_output.set("parameters", json_parameters.asString()); + } + + std::vector run_infos; + for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index) + { + for (size_t number_of_launch = 0; number_of_launch < test_info.times_to_run; ++number_of_launch) + { + size_t stat_index = number_of_launch * test_info.queries.size() + query_index; + TestStats & statistics = stats[stat_index]; + + if (!statistics.ready) + continue; + + JSONString runJSON; + + auto query = std::regex_replace(test_info.queries[query_index], QUOTE_REGEX, "\\\""); + runJSON.set("query", query); + if (!statistics.exception.empty()) + runJSON.set("exception", statistics.exception); + + if (test_info.exec_type == ExecutionType::Loop) + { + /// in seconds + if (has_metric("min_time")) + runJSON.set("min_time", statistics.min_time / double(1000)); + + if (has_metric("quantiles")) + { + JSONString quantiles(4); /// here, 4 is the size of \t padding + for (double percent = 10; percent <= 90; percent += 10) + { + String quantile_key = std::to_string(percent / 100.0); + while (quantile_key.back() == '0') + quantile_key.pop_back(); + + quantiles.set(quantile_key, + statistics.sampler.quantileInterpolated(percent / 100.0)); + } + quantiles.set("0.95", + statistics.sampler.quantileInterpolated(95 / 100.0)); + quantiles.set("0.99", + statistics.sampler.quantileInterpolated(99 / 100.0)); + quantiles.set("0.999", + statistics.sampler.quantileInterpolated(99.9 / 100.0)); + quantiles.set("0.9999", + statistics.sampler.quantileInterpolated(99.99 / 100.0)); + + runJSON.set("quantiles", quantiles.asString()); + } + + if (has_metric("total_time")) + runJSON.set("total_time", statistics.total_time); + + if (has_metric("queries_per_second")) + runJSON.set("queries_per_second", + double(statistics.queries) / statistics.total_time); + + if (has_metric("rows_per_second")) + runJSON.set("rows_per_second", + double(statistics.total_rows_read) / statistics.total_time); + + if (has_metric("bytes_per_second")) + runJSON.set("bytes_per_second", + double(statistics.total_bytes_read) / statistics.total_time); + } + else + { + if (has_metric("max_rows_per_second")) + runJSON.set("max_rows_per_second", statistics.max_rows_speed); + + if (has_metric("max_bytes_per_second")) + runJSON.set("max_bytes_per_second", statistics.max_bytes_speed); + + if (has_metric("avg_rows_per_second")) + runJSON.set("avg_rows_per_second", statistics.avg_rows_speed_value); + + if (has_metric("avg_bytes_per_second")) + runJSON.set("avg_bytes_per_second", statistics.avg_bytes_speed_value); + } + + run_infos.push_back(runJSON); + } + } + + json_output.set("runs", run_infos); + + return json_output.asString(); +} + +std::string ReportBuilder::buildCompactReport( + const PerformanceTestInfo & test_info, + std::vector & stats) const +{ + + String output; + + for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index) + { + for (size_t number_of_launch = 0; number_of_launch < test_info.times_to_run; ++number_of_launch) + { + if (test_info.queries.size() > 1) + output += "query \"" + test_info.queries[query_index] + "\", "; + + output += "run " + std::to_string(number_of_launch + 1) + ": "; + output += test_info.main_metric + " = "; + size_t index = number_of_launch * test_info.queries.size() + query_index; + output += stats[index].getStatisticByName(test_info.main_metric); + output += "\n"; + } + } + return output; +} + + +} diff --git a/dbms/programs/performance-test/ReportBuilder.h b/dbms/programs/performance-test/ReportBuilder.h new file mode 100644 index 00000000000..0972061e27a --- /dev/null +++ b/dbms/programs/performance-test/ReportBuilder.h @@ -0,0 +1,30 @@ +#pragma once +#include "PerformanceTestInfo.h" + +namespace DB +{ + +class ReportBuilder +{ +public: + explicit ReportBuilder(const std::string & server_version_); + std::string buildFullReport( + const PerformanceTestInfo & test_info, + std::vector & stats) const; + + std::string buildCompactReport( + const PerformanceTestInfo & test_info, + std::vector & stats) const; +private: + std::string server_version; + std::string hostname; + size_t num_cores; + size_t num_threads; + size_t ram; + +private: + std::string getCurrentTime() const; + +}; + +} diff --git a/dbms/programs/performance-test/TestStats.cpp b/dbms/programs/performance-test/TestStats.cpp index 163aefdc98d..bc23ef17472 100644 --- a/dbms/programs/performance-test/TestStats.cpp +++ b/dbms/programs/performance-test/TestStats.cpp @@ -157,6 +157,7 @@ void TestStats::clear() total_bytes_read = 0; last_query_rows_read = 0; last_query_bytes_read = 0; + got_SIGINT = false; min_time = std::numeric_limits::max(); total_time = 0; diff --git a/dbms/programs/performance-test/TestStats.h b/dbms/programs/performance-test/TestStats.h index 41a8efc3beb..5b8dd773566 100644 --- a/dbms/programs/performance-test/TestStats.h +++ b/dbms/programs/performance-test/TestStats.h @@ -51,6 +51,8 @@ struct TestStats bool ready = false; // check if a query wasn't interrupted by SIGINT String exception; + bool got_SIGINT = false; + String getStatisticByName(const String & statistic_name); void update_min_time(UInt64 min_time_candidate); diff --git a/dbms/programs/performance-test/applySubstitutions.cpp b/dbms/programs/performance-test/applySubstitutions.cpp new file mode 100644 index 00000000000..915d9ba7230 --- /dev/null +++ b/dbms/programs/performance-test/applySubstitutions.cpp @@ -0,0 +1,82 @@ +#include "applySubstitutions.h" +#include +#include + +namespace DB +{ + +void constructSubstitutions(ConfigurationPtr & substitutions_view, StringToVector & out_substitutions) +{ + std::vector xml_substitutions; + substitutions_view->keys(xml_substitutions); + + for (size_t i = 0; i != xml_substitutions.size(); ++i) + { + const ConfigurationPtr xml_substitution(substitutions_view->createView("substitution[" + std::to_string(i) + "]")); + + /// Property values for substitution will be stored in a vector + /// accessible by property name + std::vector xml_values; + xml_substitution->keys("values", xml_values); + + String name = xml_substitution->getString("name"); + + for (size_t j = 0; j != xml_values.size(); ++j) + { + out_substitutions[name].push_back(xml_substitution->getString("values.value[" + std::to_string(j) + "]")); + } + } +} + +/// Recursive method which goes through all substitution blocks in xml +/// and replaces property {names} by their values +void runThroughAllOptionsAndPush(StringToVector::iterator substitutions_left, + StringToVector::iterator substitutions_right, + const String & template_query, + std::vector & out_queries) +{ + if (substitutions_left == substitutions_right) + { + out_queries.push_back(template_query); /// completely substituted query + return; + } + + String substitution_mask = "{" + substitutions_left->first + "}"; + + if (template_query.find(substitution_mask) == String::npos) /// nothing to substitute here + { + runThroughAllOptionsAndPush(std::next(substitutions_left), substitutions_right, template_query, out_queries); + return; + } + + for (const String & value : substitutions_left->second) + { + /// Copy query string for each unique permutation + std::string query = template_query; + size_t substr_pos = 0; + + while (substr_pos != String::npos) + { + substr_pos = query.find(substitution_mask); + + if (substr_pos != String::npos) + query.replace(substr_pos, substitution_mask.length(), value); + } + + runThroughAllOptionsAndPush(std::next(substitutions_left), substitutions_right, query, out_queries); + } +} + +std::vector formatQueries(const String & query, StringToVector substitutions_to_generate) +{ + std::vector queries_res; + runThroughAllOptionsAndPush( + substitutions_to_generate.begin(), + substitutions_to_generate.end(), + query, + queries_res); + return queries_res; +} + + +} diff --git a/dbms/programs/performance-test/applySubstitutions.h b/dbms/programs/performance-test/applySubstitutions.h new file mode 100644 index 00000000000..7d50e4bb09a --- /dev/null +++ b/dbms/programs/performance-test/applySubstitutions.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +using StringToVector = std::map>; +using ConfigurationPtr = Poco::AutoPtr; + +void constructSubstitutions(ConfigurationPtr & substitutions_view, StringToVector & out_substitutions); + +std::vector formatQueries(const String & query, StringToVector substitutions_to_generate); + +} diff --git a/dbms/programs/performance-test/executeQuery.cpp b/dbms/programs/performance-test/executeQuery.cpp new file mode 100644 index 00000000000..45487acf3b9 --- /dev/null +++ b/dbms/programs/performance-test/executeQuery.cpp @@ -0,0 +1,72 @@ +#include "executeQuery.h" +#include +#include +#include +namespace DB +{ +namespace +{ + +void checkFulfilledConditionsAndUpdate( + const Progress & progress, RemoteBlockInputStream & stream, + TestStats & statistics, TestStopConditions & stop_conditions, + InterruptListener & interrupt_listener) +{ + statistics.add(progress.rows, progress.bytes); + + stop_conditions.reportRowsRead(statistics.total_rows_read); + stop_conditions.reportBytesReadUncompressed(statistics.total_bytes_read); + stop_conditions.reportTotalTime(statistics.watch.elapsed() / (1000 * 1000)); + stop_conditions.reportMinTimeNotChangingFor(statistics.min_time_watch.elapsed() / (1000 * 1000)); + stop_conditions.reportMaxSpeedNotChangingFor(statistics.max_rows_speed_watch.elapsed() / (1000 * 1000)); + stop_conditions.reportAverageSpeedNotChangingFor(statistics.avg_rows_speed_watch.elapsed() / (1000 * 1000)); + + if (stop_conditions.areFulfilled()) + { + statistics.last_query_was_cancelled = true; + stream.cancel(false); + } + + if (interrupt_listener.check()) + { + statistics.got_SIGINT = true; + statistics.last_query_was_cancelled = true; + stream.cancel(false); + } +} + +} + +void executeQuery( + Connection & connection, + const std::string & query, + TestStats & statistics, + TestStopConditions & stop_conditions, + InterruptListener & interrupt_listener) +{ + statistics.watch_per_query.restart(); + statistics.last_query_was_cancelled = false; + statistics.last_query_rows_read = 0; + statistics.last_query_bytes_read = 0; + + Settings settings; + Context global_context = Context::createGlobal(); + RemoteBlockInputStream stream(connection, query, {}, global_context, &settings); + + stream.setProgressCallback( + [&](const Progress & value) + { + checkFulfilledConditionsAndUpdate( + value, stream, statistics, + stop_conditions, interrupt_listener); + }); + stream.readPrefix(); + while (Block block = stream.read()); + stream.readSuffix(); + + if (!statistics.last_query_was_cancelled) + statistics.updateQueryInfo(); + + statistics.setTotalTime(); +} +} diff --git a/dbms/programs/performance-test/executeQuery.h b/dbms/programs/performance-test/executeQuery.h new file mode 100644 index 00000000000..27272842f02 --- /dev/null +++ b/dbms/programs/performance-test/executeQuery.h @@ -0,0 +1,16 @@ +#pragma once +#include +#include "TestStats.h" +#include "TestStopConditions.h" +#include +#include + +namespace DB +{ +void executeQuery( + Connection & connection, + const std::string & query, + TestStats & statistics, + TestStopConditions & stop_conditions, + InterruptListener & interrupt_listener); +} From 253ac93459f80610a864b033893d36ab5f7c7380 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 25 Jan 2019 21:44:30 +0300 Subject: [PATCH 137/586] Addition to prev. revision #4150 --- dbms/src/Common/CurrentThread.cpp | 3 +++ dbms/src/DataStreams/AsynchronousBlockInputStream.cpp | 2 +- .../MergingAggregatedMemoryEfficientBlockInputStream.cpp | 2 +- dbms/src/Interpreters/ThreadStatusExt.cpp | 1 - 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/dbms/src/Common/CurrentThread.cpp b/dbms/src/Common/CurrentThread.cpp index c3e0cae9571..b6d161af67e 100644 --- a/dbms/src/Common/CurrentThread.cpp +++ b/dbms/src/Common/CurrentThread.cpp @@ -71,6 +71,9 @@ std::shared_ptr CurrentThread::getInternalTextLogsQueue() ThreadGroupStatusPtr CurrentThread::getGroup() { + if (!current_thread) + return nullptr; + return get().getThreadGroup(); } diff --git a/dbms/src/DataStreams/AsynchronousBlockInputStream.cpp b/dbms/src/DataStreams/AsynchronousBlockInputStream.cpp index ba31b45bfd2..558d15be456 100644 --- a/dbms/src/DataStreams/AsynchronousBlockInputStream.cpp +++ b/dbms/src/DataStreams/AsynchronousBlockInputStream.cpp @@ -35,7 +35,7 @@ void AsynchronousBlockInputStream::next() { ready.reset(); - pool.schedule([this, thread_group=CurrentThread::getGroup()] () + pool.schedule([this, thread_group = CurrentThread::getGroup()] () { CurrentMetrics::Increment metric_increment{CurrentMetrics::QueryThread}; diff --git a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp index f226da9e442..bde030d8afa 100644 --- a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp @@ -195,7 +195,7 @@ void MergingAggregatedMemoryEfficientBlockInputStream::start() */ for (size_t i = 0; i < merging_threads; ++i) - pool.schedule([this, thread_group=CurrentThread::getGroup()] () { mergeThread(thread_group); }); + pool.schedule([this, thread_group = CurrentThread::getGroup()] () { mergeThread(thread_group); }); } } diff --git a/dbms/src/Interpreters/ThreadStatusExt.cpp b/dbms/src/Interpreters/ThreadStatusExt.cpp index 987365cb3c4..669322a2509 100644 --- a/dbms/src/Interpreters/ThreadStatusExt.cpp +++ b/dbms/src/Interpreters/ThreadStatusExt.cpp @@ -37,7 +37,6 @@ String ThreadStatus::getQueryID() void CurrentThread::defaultThreadDeleter() { ThreadStatus & thread = CurrentThread::get(); - LOG_TRACE(thread.log, "Thread " << thread.thread_number << " exited"); thread.detachQuery(true, true); } From e1f818a1b6cd6f7c3f69d693307e5349f377a583 Mon Sep 17 00:00:00 2001 From: Alexander GQ Gerasiov Date: Wed, 16 Jan 2019 01:39:01 +0300 Subject: [PATCH 138/586] Enable link time optimization (-flto). This reduce target's size by 30%. Signed-off-by: Alexander GQ Gerasiov --- CMakeLists.txt | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6bd0aae5fd1..8b1903a43d3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,8 +1,23 @@ project (ClickHouse) -cmake_minimum_required (VERSION 3.3) +cmake_minimum_required (VERSION 3.9) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/") +set(ENABLE_IPO OFF CACHE STRING "Enable inter-procedural optimization (aka LTO)") + +if (ENABLE_IPO) +include(CheckIPOSupported) +check_ipo_supported(RESULT IPO_SUPPORTED OUTPUT IPO_NOT_SUPPORTED) +if(IPO_SUPPORTED) + message(STATUS "IPO/LTO is supported, enabling") + set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) +else() + message(STATUS "IPO/LTO is not supported: <${IPO_NOT_SUPPORTED}>") +endif() +else() + message(STATUS "IPO/LTO not enabled.") +endif() + if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") # Require at least gcc 7 if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7 AND NOT CMAKE_VERSION VERSION_LESS 2.8.9) From dd16a012a4f1423eafb8be05b3302960a36b7cab Mon Sep 17 00:00:00 2001 From: proller Date: Fri, 25 Jan 2019 22:18:05 +0300 Subject: [PATCH 139/586] IPO fixes --- CMakeLists.txt | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8b1903a43d3..f7297f31ed8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,19 +1,19 @@ project (ClickHouse) -cmake_minimum_required (VERSION 3.9) +cmake_minimum_required (VERSION 3.3) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/") -set(ENABLE_IPO OFF CACHE STRING "Enable inter-procedural optimization (aka LTO)") - -if (ENABLE_IPO) -include(CheckIPOSupported) -check_ipo_supported(RESULT IPO_SUPPORTED OUTPUT IPO_NOT_SUPPORTED) -if(IPO_SUPPORTED) - message(STATUS "IPO/LTO is supported, enabling") - set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) -else() - message(STATUS "IPO/LTO is not supported: <${IPO_NOT_SUPPORTED}>") -endif() +option(ENABLE_IPO "Enable inter-procedural optimization (aka LTO)" OFF) # need cmake 3.9+ +if(ENABLE_IPO) + cmake_policy(SET CMP0069 NEW) + include(CheckIPOSupported) + check_ipo_supported(RESULT IPO_SUPPORTED OUTPUT IPO_NOT_SUPPORTED) + if(IPO_SUPPORTED) + message(STATUS "IPO/LTO is supported, enabling") + set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) + else() + message(STATUS "IPO/LTO is not supported: <${IPO_NOT_SUPPORTED}>") + endif() else() message(STATUS "IPO/LTO not enabled.") endif() From 6964fb47ff5074d746a1b68b1ebc32fe558fe2a8 Mon Sep 17 00:00:00 2001 From: proller Date: Fri, 25 Jan 2019 22:29:01 +0300 Subject: [PATCH 140/586] BUILD_DETERMINISTIC --- .../StorageSystemBuildOptions.generated.cpp.in | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in index 5c72545ab44..1cb420ce7b6 100644 --- a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in +++ b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in @@ -1,15 +1,23 @@ // .cpp autogenerated by cmake +#cmakedefine01 BUILD_DETERMINISTIC + const char * auto_config_build[] { "VERSION_FULL", "@VERSION_FULL@", "VERSION_DESCRIBE", "@VERSION_DESCRIBE@", + "VERSION_INTEGER", "@VERSION_INTEGER@", + +#if BUILD_DETERMINISTIC + "SYSTEM", "@CMAKE_SYSTEM_NAME@", +#else "VERSION_GITHASH", "@VERSION_GITHASH@", "VERSION_REVISION", "@VERSION_REVISION@", - "VERSION_INTEGER", "@VERSION_INTEGER@", "BUILD_DATE", "@BUILD_DATE@", - "BUILD_TYPE", "@CMAKE_BUILD_TYPE@", "SYSTEM", "@CMAKE_SYSTEM@", +#endif + + "BUILD_TYPE", "@CMAKE_BUILD_TYPE@", "SYSTEM_PROCESSOR", "@CMAKE_SYSTEM_PROCESSOR@", "LIBRARY_ARCHITECTURE", "@CMAKE_LIBRARY_ARCHITECTURE@", "CMAKE_VERSION", "@CMAKE_VERSION@", From 038a48bb3833b2241c7afe9cab6d5b8bdc48c132 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Ercolanelli?= Date: Fri, 25 Jan 2019 20:35:53 +0100 Subject: [PATCH 141/586] sumMap: implement sumMapWithOverflow --- .../AggregateFunctionSumMap.cpp | 47 ++++++++++++++++--- .../AggregateFunctionSumMap.h | 35 +++++++------- 2 files changed, 58 insertions(+), 24 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSumMap.cpp b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.cpp index 02303b953d9..75cd62c00f1 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionSumMap.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.cpp @@ -12,6 +12,37 @@ namespace DB namespace { +struct WithOverflowPolicy +{ + /// Overflow, meaning that the returned type is the same as the input type. + static DataTypePtr promoteType(const DataTypePtr & data_type) { return data_type; } +}; + +struct WithoutOverflowPolicy +{ + /// No overflow, meaning we promote the types if necessary. + static DataTypePtr promoteType(const DataTypePtr & data_type) + { + if (!data_type->canBePromoted()) + throw new Exception{"Values to be summed are expected to be Numeric, Float or Decimal.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + + return data_type->promoteNumericType(); + } +}; + +template +using SumMapWithOverflow = AggregateFunctionSumMap; + +template +using SumMapWithoutOverflow = AggregateFunctionSumMap; + +template +using SumMapFilteredWithOverflow = AggregateFunctionSumMapFiltered; + +template +using SumMapFilteredWithoutOverflow = AggregateFunctionSumMapFiltered; + using SumMapArgs = std::pair; SumMapArgs parseArguments(const std::string & name, const DataTypes & arguments) @@ -42,21 +73,23 @@ SumMapArgs parseArguments(const std::string & name, const DataTypes & arguments) return {std::move(keys_type), std::move(values_types)}; } +template