diff --git a/dbms/programs/copier/ClusterCopier.cpp b/dbms/programs/copier/ClusterCopier.cpp index 59635e8cd95..7cdebd5b043 100644 --- a/dbms/programs/copier/ClusterCopier.cpp +++ b/dbms/programs/copier/ClusterCopier.cpp @@ -1179,7 +1179,7 @@ protected: /// Removes MATERIALIZED and ALIAS columns from create table query static ASTPtr removeAliasColumnsFromCreateQuery(const ASTPtr & query_ast) { - const ASTs & column_asts = typeid_cast(*query_ast).columns->children; + const ASTs & column_asts = typeid_cast(*query_ast).columns_list->columns->children; auto new_columns = std::make_shared(); for (const ASTPtr & column_ast : column_asts) @@ -1198,8 +1198,13 @@ protected: ASTPtr new_query_ast = query_ast->clone(); ASTCreateQuery & new_query = typeid_cast(*new_query_ast); - new_query.columns = new_columns.get(); - new_query.children.at(0) = std::move(new_columns); + + auto new_columns_list = std::make_shared(); + new_columns_list->set(new_columns_list->columns, new_columns); + new_columns_list->set( + new_columns_list->indices, typeid_cast(*query_ast).columns_list->indices->clone()); + + new_query.replace(new_query.columns_list, new_columns_list); return new_query_ast; } @@ -1217,7 +1222,7 @@ protected: res->table = new_table.second; res->children.clear(); - res->set(res->columns, create.columns->clone()); + res->set(res->columns_list, create.columns_list->clone()); res->set(res->storage, new_storage_ast->clone()); return res; diff --git a/dbms/src/DataStreams/MergingSortedBlockInputStream.h b/dbms/src/DataStreams/MergingSortedBlockInputStream.h index 6521ef383ed..63d0ee9b2cc 100644 --- a/dbms/src/DataStreams/MergingSortedBlockInputStream.h +++ b/dbms/src/DataStreams/MergingSortedBlockInputStream.h @@ -157,7 +157,7 @@ protected: using QueueWithCollation = std::priority_queue; QueueWithCollation queue_with_collation; - /// Used in Vertical merge algorithm to gather non-PK columns (on next step) + /// Used in Vertical merge algorithm to gather non-PK/non-index columns (on next step) /// If it is not nullptr then it should be populated during execution WriteBuffer * out_row_sources_buf; diff --git a/dbms/src/Databases/DatabaseDictionary.cpp b/dbms/src/Databases/DatabaseDictionary.cpp index 52da05ad7f8..3d8454bfd81 100644 --- a/dbms/src/Databases/DatabaseDictionary.cpp +++ b/dbms/src/Databases/DatabaseDictionary.cpp @@ -138,6 +138,7 @@ void DatabaseDictionary::alterTable( const Context &, const String &, const ColumnsDescription &, + const IndicesDescription &, const ASTModifier &) { throw Exception("DatabaseDictionary: alterTable() is not supported", ErrorCodes::NOT_IMPLEMENTED); diff --git a/dbms/src/Databases/DatabaseDictionary.h b/dbms/src/Databases/DatabaseDictionary.h index bfbbc7a07fa..178f12965ad 100644 --- a/dbms/src/Databases/DatabaseDictionary.h +++ b/dbms/src/Databases/DatabaseDictionary.h @@ -71,6 +71,7 @@ public: const Context & context, const String & name, const ColumnsDescription & columns, + const IndicesDescription & indices, const ASTModifier & engine_modifier) override; time_t getTableMetadataModificationTime( diff --git a/dbms/src/Databases/DatabaseMemory.cpp b/dbms/src/Databases/DatabaseMemory.cpp index 7b736220308..3eea0bc666a 100644 --- a/dbms/src/Databases/DatabaseMemory.cpp +++ b/dbms/src/Databases/DatabaseMemory.cpp @@ -53,6 +53,7 @@ void DatabaseMemory::alterTable( const Context &, const String &, const ColumnsDescription &, + const IndicesDescription &, const ASTModifier &) { throw Exception("DatabaseMemory: alterTable() is not supported", ErrorCodes::NOT_IMPLEMENTED); diff --git a/dbms/src/Databases/DatabaseMemory.h b/dbms/src/Databases/DatabaseMemory.h index 6471dd14dd7..fe7cc783ba3 100644 --- a/dbms/src/Databases/DatabaseMemory.h +++ b/dbms/src/Databases/DatabaseMemory.h @@ -48,6 +48,7 @@ public: const Context & context, const String & name, const ColumnsDescription & columns, + const IndicesDescription & indices, const ASTModifier & engine_modifier) override; time_t getTableMetadataModificationTime( diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index 50dc3ec2ef9..ce1e6f767bb 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -510,6 +510,7 @@ void DatabaseOrdinary::alterTable( const Context & context, const String & table_name, const ColumnsDescription & columns, + const IndicesDescription & indices, const ASTModifier & storage_modifier) { /// Read the definition of the table and replace the necessary parts with new ones. @@ -531,7 +532,14 @@ void DatabaseOrdinary::alterTable( ASTCreateQuery & ast_create_query = typeid_cast(*ast); ASTPtr new_columns = InterpreterCreateQuery::formatColumns(columns); - ast_create_query.replace(ast_create_query.columns, new_columns); + ASTPtr new_indices = InterpreterCreateQuery::formatIndices(indices); + + ast_create_query.columns_list->replace(ast_create_query.columns_list->columns, new_columns); + + if (ast_create_query.columns_list->indices) + ast_create_query.columns_list->replace(ast_create_query.columns_list->indices, new_indices); + else + ast_create_query.columns_list->set(ast_create_query.columns_list->indices, new_indices); if (storage_modifier) storage_modifier(*ast_create_query.storage); diff --git a/dbms/src/Databases/DatabaseOrdinary.h b/dbms/src/Databases/DatabaseOrdinary.h index bf6339eaaa7..887bf101d62 100644 --- a/dbms/src/Databases/DatabaseOrdinary.h +++ b/dbms/src/Databases/DatabaseOrdinary.h @@ -42,6 +42,7 @@ public: const Context & context, const String & name, const ColumnsDescription & columns, + const IndicesDescription & indices, const ASTModifier & engine_modifier) override; time_t getTableMetadataModificationTime( diff --git a/dbms/src/Databases/DatabasesCommon.cpp b/dbms/src/Databases/DatabasesCommon.cpp index 3189701d13c..780140969ad 100644 --- a/dbms/src/Databases/DatabasesCommon.cpp +++ b/dbms/src/Databases/DatabasesCommon.cpp @@ -68,10 +68,10 @@ std::pair createTableFromDefinition( /// We do not directly use `InterpreterCreateQuery::execute`, because /// - the database has not been created yet; /// - the code is simpler, since the query is already brought to a suitable form. - if (!ast_create_query.columns) + if (!ast_create_query.columns_list || !ast_create_query.columns_list->columns) throw Exception("Missing definition of columns.", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED); - ColumnsDescription columns = InterpreterCreateQuery::getColumnsDescription(*ast_create_query.columns, context); + ColumnsDescription columns = InterpreterCreateQuery::getColumnsDescription(*ast_create_query.columns_list->columns, context); return { diff --git a/dbms/src/Databases/IDatabase.h b/dbms/src/Databases/IDatabase.h index 90dfa229f11..e6b67a87c9b 100644 --- a/dbms/src/Databases/IDatabase.h +++ b/dbms/src/Databases/IDatabase.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -115,6 +116,7 @@ public: const Context & context, const String & name, const ColumnsDescription & columns, + const IndicesDescription & indices, const ASTModifier & engine_modifier) = 0; /// Returns time of table's metadata change, 0 if there is no corresponding metadata file. diff --git a/dbms/src/Functions/bitSwapLastTwo.cpp b/dbms/src/Functions/bitSwapLastTwo.cpp new file mode 100644 index 00000000000..6b7a9ba6573 --- /dev/null +++ b/dbms/src/Functions/bitSwapLastTwo.cpp @@ -0,0 +1,51 @@ +#include +#include +#include + +namespace DB +{ + +template +struct BitSwapLastTwoImpl +{ + using ResultType = UInt8; + + static inline ResultType apply(A a) + { + return static_cast( + ((static_cast(a) & 1) << 1) | ((static_cast(a) >> 1) & 1)); + } + +#if USE_EMBEDDED_COMPILER + static constexpr bool compilable = true; + + static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool) + { + if (!arg->getType()->isIntegerTy()) + throw Exception("__bitSwapLastTwo expected an integral type", ErrorCodes::LOGICAL_ERROR); + return b.CreateOr( + b.CreateShl(b.CreateAnd(arg, 1), 1), + b.CreateAnd(b.CreateLShr(arg, 1), 1) + ); + } +#endif +}; + +struct NameBitSwapLastTwo { static constexpr auto name = "__bitSwapLastTwo"; }; +using FunctionBitSwapLastTwo = FunctionUnaryArithmetic; + +template <> struct FunctionUnaryArithmeticMonotonicity +{ + static bool has() { return false; } + static IFunction::Monotonicity get(const Field &, const Field &) + { + return {}; + } +}; + +void registerFunctionBitSwapLastTwo(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/dbms/src/Functions/registerFunctionsArithmetic.cpp b/dbms/src/Functions/registerFunctionsArithmetic.cpp index 430d1196a6b..77df48fc6b4 100644 --- a/dbms/src/Functions/registerFunctionsArithmetic.cpp +++ b/dbms/src/Functions/registerFunctionsArithmetic.cpp @@ -33,6 +33,8 @@ void registerFunctionRoundToExp2(FunctionFactory & factory); void registerFunctionRoundDuration(FunctionFactory & factory); void registerFunctionRoundAge(FunctionFactory & factory); +void registerFunctionBitSwapLastTwo(FunctionFactory & factory); + void registerFunctionsArithmetic(FunctionFactory & factory) { registerFunctionPlus(factory); @@ -64,6 +66,9 @@ void registerFunctionsArithmetic(FunctionFactory & factory) registerFunctionRoundToExp2(factory); registerFunctionRoundDuration(factory); registerFunctionRoundAge(factory); + + /// Not for external use. + registerFunctionBitSwapLastTwo(factory); } } diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 53bf85d2099..1c9a2ef418b 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -105,7 +105,9 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) const ASTStorage & storage = *create.storage; const ASTFunction & engine = *storage.engine; /// Currently, there are no database engines, that support any arguments. - if (engine.arguments || engine.parameters || storage.partition_by || storage.primary_key || storage.order_by || storage.sample_by || storage.settings) + if (engine.arguments || engine.parameters || storage.partition_by || storage.primary_key + || storage.order_by || storage.sample_by || storage.settings || + (create.columns_list && create.columns_list->indices && !create.columns_list->indices->children.empty())) { std::stringstream ostr; formatAST(storage, ostr, false, false); @@ -397,6 +399,16 @@ ASTPtr InterpreterCreateQuery::formatColumns(const ColumnsDescription & columns) return columns_list; } +ASTPtr InterpreterCreateQuery::formatIndices(const IndicesDescription & indices) +{ + auto res = std::make_shared(); + + for (const auto & index : indices.indices) + res->children.push_back(index->clone()); + + return res; +} + ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTExpressionList & columns, const Context & context) { ColumnsDescription res; @@ -449,9 +461,9 @@ ColumnsDescription InterpreterCreateQuery::setColumns( { ColumnsDescription res; - if (create.columns) + if (create.columns_list && create.columns_list->columns) { - res = getColumnsDescription(*create.columns, context); + res = getColumnsDescription(*create.columns_list->columns, context); } else if (!create.as_table.empty()) { @@ -467,10 +479,16 @@ ColumnsDescription InterpreterCreateQuery::setColumns( /// Even if query has list of columns, canonicalize it (unfold Nested columns). ASTPtr new_columns = formatColumns(res); - if (create.columns) - create.replace(create.columns, new_columns); + if (!create.columns_list) + { + auto new_columns_list = std::make_shared(); + create.set(create.columns_list, new_columns_list); + } + + if (create.columns_list->columns) + create.columns_list->replace(create.columns_list->columns, new_columns); else - create.set(create.columns, new_columns); + create.columns_list->set(create.columns_list->columns, new_columns); /// Check for duplicates std::set all_columns; @@ -550,7 +568,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) String table_name_escaped = escapeForFileName(table_name); // If this is a stub ATTACH query, read the query definition from the database - if (create.attach && !create.storage && !create.columns) + if (create.attach && !create.storage && !create.columns_list) { // Table SQL definition is available even if the table is detached auto query = context.getCreateTableQuery(database_name, table_name); @@ -569,7 +587,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) } Block as_select_sample; - if (create.select && (!create.attach || !create.columns)) + if (create.select && (!create.attach || !create.columns_list)) as_select_sample = InterpreterSelectWithUnionQuery::getSampleBlock(create.select->clone(), context); String as_database_name = create.as_database.empty() ? current_database : create.as_database; diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.h b/dbms/src/Interpreters/InterpreterCreateQuery.h index 0fc26847574..aa9c02df8a5 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.h +++ b/dbms/src/Interpreters/InterpreterCreateQuery.h @@ -2,6 +2,7 @@ #include #include +#include #include @@ -29,6 +30,8 @@ public: static ASTPtr formatColumns(const NamesAndTypesList & columns); static ASTPtr formatColumns(const ColumnsDescription & columns); + static ASTPtr formatIndices(const IndicesDescription & indices); + void setDatabaseLoadingThreadpool(ThreadPool & thread_pool_) { thread_pool = &thread_pool_; diff --git a/dbms/src/Interpreters/InterpreterSystemQuery.cpp b/dbms/src/Interpreters/InterpreterSystemQuery.cpp index 1707b9f7f9f..722a504f35e 100644 --- a/dbms/src/Interpreters/InterpreterSystemQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSystemQuery.cpp @@ -252,7 +252,7 @@ StoragePtr InterpreterSystemQuery::tryRestartReplica(const String & database_nam create.attach = true; std::string data_path = database->getDataPath(); - auto columns = InterpreterCreateQuery::getColumnsDescription(*create.columns, system_context); + auto columns = InterpreterCreateQuery::getColumnsDescription(*create.columns_list->columns, system_context); StoragePtr table = StorageFactory::instance().get(create, data_path, diff --git a/dbms/src/Interpreters/SystemLog.h b/dbms/src/Interpreters/SystemLog.h index 4cb2bb76b4f..94b9817d9ea 100644 --- a/dbms/src/Interpreters/SystemLog.h +++ b/dbms/src/Interpreters/SystemLog.h @@ -358,7 +358,10 @@ void SystemLog::prepareTable() create->table = table_name; Block sample = LogElement::createBlock(); - create->set(create->columns, InterpreterCreateQuery::formatColumns(sample.getNamesAndTypesList())); + + auto new_columns_list = std::make_shared(); + new_columns_list->set(new_columns_list->columns, InterpreterCreateQuery::formatColumns(sample.getNamesAndTypesList())); + create->set(create->columns_list, new_columns_list); ParserStorage storage_parser; ASTPtr storage_ast = parseQuery( diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp index c5cdf1475e3..12491c83762 100644 --- a/dbms/src/Parsers/ASTAlterQuery.cpp +++ b/dbms/src/Parsers/ASTAlterQuery.cpp @@ -82,6 +82,24 @@ void ASTAlterCommand::formatImpl( settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY ORDER BY " << (settings.hilite ? hilite_none : ""); order_by->formatImpl(settings, state, frame); } + else if (type == ASTAlterCommand::ADD_INDEX) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD INDEX " << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : ""); + index_decl->formatImpl(settings, state, frame); + + /// AFTER + if (index) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " AFTER " << (settings.hilite ? hilite_none : ""); + index->formatImpl(settings, state, frame); + } + } + else if (type == ASTAlterCommand::DROP_INDEX) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str + << "DROP INDEX " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : ""); + index->formatImpl(settings, state, frame); + } else if (type == ASTAlterCommand::DROP_PARTITION) { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << (detach ? "DETACH" : "DROP") << " PARTITION " diff --git a/dbms/src/Parsers/ASTAlterQuery.h b/dbms/src/Parsers/ASTAlterQuery.h index a6759482a56..2c77e2031de 100644 --- a/dbms/src/Parsers/ASTAlterQuery.h +++ b/dbms/src/Parsers/ASTAlterQuery.h @@ -28,6 +28,9 @@ public: COMMENT_COLUMN, MODIFY_ORDER_BY, + ADD_INDEX, + DROP_INDEX, + DROP_PARTITION, ATTACH_PARTITION, REPLACE_PARTITION, @@ -58,6 +61,15 @@ public: */ ASTPtr order_by; + /** The ADD INDEX query stores the IndexDeclaration there. + */ + ASTPtr index_decl; + + /** The ADD INDEX query stores the name of the index following AFTER. + * The DROP INDEX query stores the name for deletion. + */ + ASTPtr index; + /** Used in DROP PARTITION and ATTACH PARTITION FROM queries. * The value or ID of the partition is stored here. */ diff --git a/dbms/src/Parsers/ASTCreateQuery.h b/dbms/src/Parsers/ASTCreateQuery.h index 840ba345813..6be7451529d 100644 --- a/dbms/src/Parsers/ASTCreateQuery.h +++ b/dbms/src/Parsers/ASTCreateQuery.h @@ -38,6 +38,7 @@ public: res->set(res->order_by, order_by->clone()); if (sample_by) res->set(res->sample_by, sample_by->clone()); + if (settings) res->set(res->settings, settings->clone()); @@ -81,6 +82,95 @@ public: }; +class ASTColumns : public IAST +{ +private: + class ASTColumnsElement : public IAST + { + public: + String prefix; + IAST * elem; + + String getID(char c) const override { return "ASTColumnsElement for " + elem->getID(c); } + + ASTPtr clone() const override + { + auto res = std::make_shared(); + res->prefix = prefix; + if (elem) + res->set(res->elem, elem->clone()); + return res; + } + + void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override + { + if (!elem) + return; + + if (prefix.empty()) + { + elem->formatImpl(s, state, frame); + return; + } + + frame.need_parens = false; + std::string indent_str = s.one_line ? "" : std::string(4 * frame.indent, ' '); + + s.ostr << s.nl_or_ws << indent_str; + s.ostr << (s.hilite ? hilite_keyword : "") << prefix << (s.hilite ? hilite_none : ""); + + FormatSettings nested_settings = s; + nested_settings.one_line = true; + nested_settings.nl_or_ws = ' '; + + elem->formatImpl(nested_settings, state, frame); + } + }; +public: + ASTExpressionList * columns = nullptr; + ASTExpressionList * indices = nullptr; + + String getID(char) const override { return "Columns definition"; } + + ASTPtr clone() const override + { + auto res = std::make_shared(); + + if (columns) + res->set(res->columns, columns->clone()); + if (indices) + res->set(res->indices, indices->clone()); + + return res; + } + + void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override + { + ASTExpressionList list; + + if (columns) + for (const auto & column : columns->children) + { + auto elem = std::make_shared(); + elem->prefix = ""; + elem->set(elem->elem, column->clone()); + list.children.push_back(elem); + } + if (indices) + for (const auto & index : indices->children) + { + auto elem = std::make_shared(); + elem->prefix = "INDEX"; + elem->set(elem->elem, index->clone()); + list.children.push_back(elem); + } + + if (!list.children.empty()) + list.formatImpl(s, state, frame); + } +}; + + /// CREATE TABLE or ATTACH TABLE query class ASTCreateQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster { @@ -90,7 +180,7 @@ public: bool is_view{false}; bool is_materialized_view{false}; bool is_populate{false}; - ASTExpressionList * columns = nullptr; + ASTColumns * columns_list = nullptr; String to_database; /// For CREATE MATERIALIZED VIEW mv TO table. String to_table; ASTStorage * storage = nullptr; @@ -106,8 +196,8 @@ public: auto res = std::make_shared(*this); res->children.clear(); - if (columns) - res->set(res->columns, columns->clone()); + if (columns_list) + res->set(res->columns_list, columns_list->clone()); if (storage) res->set(res->storage, storage->clone()); if (select) @@ -175,12 +265,12 @@ protected: << (!as_database.empty() ? backQuoteIfNeed(as_database) + "." : "") << backQuoteIfNeed(as_table); } - if (columns) + if (columns_list) { settings.ostr << (settings.one_line ? " (" : "\n("); FormatStateStacked frame_nested = frame; ++frame_nested.indent; - columns->formatImpl(settings, state, frame_nested); + columns_list->formatImpl(settings, state, frame_nested); settings.ostr << (settings.one_line ? ")" : "\n)"); } diff --git a/dbms/src/Parsers/ASTIndexDeclaration.h b/dbms/src/Parsers/ASTIndexDeclaration.h new file mode 100644 index 00000000000..6440812a531 --- /dev/null +++ b/dbms/src/Parsers/ASTIndexDeclaration.h @@ -0,0 +1,59 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ + +/** name BY expr TYPE typename(args) GRANULARITY int in create query + */ +class ASTIndexDeclaration : public IAST +{ +public: + String name; + IAST * expr; + ASTFunction * type; + Field granularity; + + /** Get the text that identifies this element. */ + String getID(char) const override { return "Index"; } + + ASTPtr clone() const override + { + auto res = std::make_shared(); + + res->name = name; + res->granularity = granularity; + + if (expr) + res->set(res->expr, expr->clone()); + if (type) + res->set(res->type, type->clone()); + return res; + } + + void formatImpl(const FormatSettings & s, FormatState &state, FormatStateStacked frame) const override + { + frame.need_parens = false; + std::string indent_str = s.one_line ? "" : std::string(4 * frame.indent, ' '); + + s.ostr << s.nl_or_ws << indent_str; + s.ostr << backQuoteIfNeed(name); + s.ostr << " "; + expr->formatImpl(s, state, frame); + s.ostr << (s.hilite ? hilite_keyword : "") << " TYPE " << (s.hilite ? hilite_none : ""); + type->formatImpl(s, state, frame); + s.ostr << (s.hilite ? hilite_keyword : "") << " GRANULARITY " << (s.hilite ? hilite_none : ""); + s.ostr << applyVisitor(FieldVisitorToString(), granularity); + } +}; + +} diff --git a/dbms/src/Parsers/ParserAlterQuery.cpp b/dbms/src/Parsers/ParserAlterQuery.cpp index 91aa211065e..818362e9c95 100644 --- a/dbms/src/Parsers/ParserAlterQuery.cpp +++ b/dbms/src/Parsers/ParserAlterQuery.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -27,6 +28,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserKeyword s_comment_column("COMMENT COLUMN"); ParserKeyword s_modify_order_by("MODIFY ORDER BY"); + ParserKeyword s_add_index("ADD INDEX"); + ParserKeyword s_drop_index("DROP INDEX"); + ParserKeyword s_attach_partition("ATTACH PARTITION"); ParserKeyword s_detach_partition("DETACH PARTITION"); ParserKeyword s_drop_partition("DROP PARTITION"); @@ -51,6 +55,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserCompoundIdentifier parser_name; ParserStringLiteral parser_string_literal; ParserCompoundColumnDeclaration parser_col_decl; + ParserIndexDeclaration parser_idx_decl; ParserCompoundColumnDeclaration parser_modify_col_decl(false); ParserPartition parser_partition; ParserExpression parser_exp_elem; @@ -92,6 +97,33 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->type = ASTAlterCommand::DROP_COLUMN; command->detach = false; } + else if (s_add_index.ignore(pos, expected)) + { + if (s_if_not_exists.ignore(pos, expected)) + command->if_not_exists = true; + + if (!parser_idx_decl.parse(pos, command->index_decl, expected)) + return false; + + if (s_after.ignore(pos, expected)) + { + if (!parser_name.parse(pos, command->index, expected)) + return false; + } + + command->type = ASTAlterCommand::ADD_INDEX; + } + else if (s_drop_index.ignore(pos, expected)) + { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; + + if (!parser_name.parse(pos, command->index, expected)) + return false; + + command->type = ASTAlterCommand::DROP_INDEX; + command->detach = false; + } else if (s_clear_column.ignore(pos, expected)) { if (s_if_exists.ignore(pos, expected)) diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index 79767218d22..66083ae5fab 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -1,5 +1,7 @@ +#include #include #include +#include #include #include #include @@ -90,6 +92,113 @@ bool ParserColumnDeclarationList::parseImpl(Pos & pos, ASTPtr & node, Expected & .parse(pos, node, expected); } +bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword s_type("TYPE"); + ParserKeyword s_granularity("GRANULARITY"); + + ParserIdentifier name_p; + ParserIdentifierWithOptionalParameters ident_with_optional_params_p; + ParserExpression expression_p; + ParserUnsignedInteger granularity_p; + + ASTPtr name; + ASTPtr expr; + ASTPtr type; + ASTPtr granularity; + + if (!name_p.parse(pos, name, expected)) + return false; + + if (!expression_p.parse(pos, expr, expected)) + return false; + + if (!s_type.ignore(pos, expected)) + return false; + + if (!ident_with_optional_params_p.parse(pos, type, expected)) + return false; + + if (!s_granularity.ignore(pos, expected)) + return false; + + if (!granularity_p.parse(pos, granularity, expected)) + return false; + + auto index = std::make_shared(); + index->name = typeid_cast(*name).name; + index->granularity = typeid_cast(*granularity).value; + index->set(index->expr, expr); + index->set(index->type, type); + node = index; + + return true; +} + + +bool ParserColumnAndIndexDeclaraion::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword s_index("INDEX"); + + ParserIndexDeclaration index_p; + ParserColumnDeclaration column_p; + + ASTPtr new_node = nullptr; + + if (s_index.ignore(pos, expected)) + { + if (!index_p.parse(pos, new_node, expected)) + return false; + } + else + { + if (!column_p.parse(pos, new_node, expected)) + return false; + } + + node = new_node; + return true; +} + +bool ParserIndexDeclarationList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + return ParserList(std::make_unique(), std::make_unique(TokenType::Comma), false) + .parse(pos, node, expected); +} + + +bool ParserColumnsOrIndicesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTPtr list; + if (!ParserList(std::make_unique(), std::make_unique(TokenType::Comma), false) + .parse(pos, list, expected)) + return false; + + ASTPtr columns = std::make_shared(); + ASTPtr indices = std::make_shared(); + + for (const auto & elem : list->children) + { + if (typeid_cast(elem.get())) + columns->children.push_back(elem); + else if (typeid_cast(elem.get())) + indices->children.push_back(elem); + else + return false; + } + + auto res = std::make_shared(); + + if (!columns->children.empty()) + res->set(res->columns, columns); + if (!indices->children.empty()) + res->set(res->indices, indices); + + node = res; + + return true; +} + bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { @@ -169,6 +278,7 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) storage->set(storage->primary_key, primary_key); storage->set(storage->order_by, order_by); storage->set(storage->sample_by, sample_by); + storage->set(storage->settings, settings); node = storage; @@ -193,12 +303,12 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserToken s_rparen(TokenType::ClosingRoundBracket); ParserStorage storage_p; ParserIdentifier name_p; - ParserColumnDeclarationList columns_p; + ParserColumnsOrIndicesDeclarationList columns_or_indices_p; ParserSelectWithUnionQuery select_p; ASTPtr database; ASTPtr table; - ASTPtr columns; + ASTPtr columns_list; ASTPtr to_database; ASTPtr to_table; ASTPtr storage; @@ -266,7 +376,7 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) /// List of columns. if (s_lparen.ignore(pos, expected)) { - if (!columns_p.parse(pos, columns, expected)) + if (!columns_or_indices_p.parse(pos, columns_list, expected)) return false; if (!s_rparen.ignore(pos, expected)) @@ -368,7 +478,7 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) /// Optional - a list of columns can be specified. It must fully comply with SELECT. if (s_lparen.ignore(pos, expected)) { - if (!columns_p.parse(pos, columns, expected)) + if (!columns_or_indices_p.parse(pos, columns_list, expected)) return false; if (!s_rparen.ignore(pos, expected)) @@ -410,7 +520,7 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) getIdentifierName(to_database, query->to_database); getIdentifierName(to_table, query->to_table); - query->set(query->columns, columns); + query->set(query->columns_list, columns_list); query->set(query->storage, storage); getIdentifierName(as_database, query->as_database); diff --git a/dbms/src/Parsers/ParserCreateQuery.h b/dbms/src/Parsers/ParserCreateQuery.h index 1efe4e41bde..46dee1c8fa3 100644 --- a/dbms/src/Parsers/ParserCreateQuery.h +++ b/dbms/src/Parsers/ParserCreateQuery.h @@ -218,7 +218,45 @@ protected: }; -/** ENGINE = name [PARTITION BY expr] [ORDER BY expr] [PRIMARY KEY expr] [SAMPLE BY expr] [SETTINGS name = value, ...] */ +/** name BY expr TYPE typename(arg1, arg2, ...) GRANULARITY value */ +class ParserIndexDeclaration : public IParserBase +{ +public: + ParserIndexDeclaration() {} + +protected: + const char * getName() const override { return "index declaration"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + + +class ParserColumnAndIndexDeclaraion : public IParserBase +{ +protected: + const char * getName() const override { return "column or index declaration"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + + +class ParserIndexDeclarationList : public IParserBase +{ +protected: + const char * getName() const override { return "index declaration list"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + + +class ParserColumnsOrIndicesDeclarationList : public IParserBase +{ + protected: + const char * getName() const override { return "columns or indices declaration list"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + + +/** + * ENGINE = name [PARTITION BY expr] [ORDER BY expr] [PRIMARY KEY expr] [SAMPLE BY expr] [SETTINGS name = value, ...] + */ class ParserStorage : public IParserBase { protected: @@ -233,6 +271,8 @@ protected: * name1 type1, * name2 type2, * ... + * INDEX name1 expr TYPE type1(args) GRANULARITY value, + * ... * ) ENGINE = engine * * Or: diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index 452948c0e56..d3790aa3a19 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -120,6 +121,35 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.order_by = command_ast->order_by; return command; } + else if (command_ast->type == ASTAlterCommand::ADD_INDEX) + { + AlterCommand command; + command.index_decl = command_ast->index_decl; + command.type = AlterCommand::ADD_INDEX; + + const auto & ast_index_decl = typeid_cast(*command_ast->index_decl); + + command.index_name = ast_index_decl.name; + + if (command_ast->index) + command.after_index_name = typeid_cast(*command_ast->index).name; + + command.if_not_exists = command_ast->if_not_exists; + + return command; + } + else if (command_ast->type == ASTAlterCommand::DROP_INDEX) + { + if (command_ast->clear_column) + throw Exception("\"ALTER TABLE table CLEAR COLUMN column\" queries are not supported yet. Use \"CLEAR COLUMN column IN PARTITION\".", ErrorCodes::NOT_IMPLEMENTED); + + AlterCommand command; + command.type = AlterCommand::DROP_INDEX; + command.index_name = typeid_cast(*(command_ast->index)).name; + command.if_exists = command_ast->if_exists; + + return command; + } else return {}; } @@ -132,7 +162,8 @@ static bool namesEqual(const String & name_without_dot, const DB::NameAndTypePai return (name_with_dot == name_type.name.substr(0, name_without_dot.length() + 1) || name_without_dot == name_type.name); } -void AlterCommand::apply(ColumnsDescription & columns_description, ASTPtr & order_by_ast, ASTPtr & primary_key_ast) const +void AlterCommand::apply(ColumnsDescription & columns_description, IndicesDescription & indices_description, + ASTPtr & order_by_ast, ASTPtr & primary_key_ast) const { if (type == ADD_COLUMN) { @@ -297,6 +328,60 @@ void AlterCommand::apply(ColumnsDescription & columns_description, ASTPtr & orde { columns_description.comments[column_name] = comment; } + else if (type == ADD_INDEX) + { + if (std::any_of( + indices_description.indices.cbegin(), + indices_description.indices.cend(), + [this](const ASTPtr & index_ast) + { + return typeid_cast(*index_ast).name == index_name; + })) + { + if (if_not_exists) + return; + else + throw Exception{"Cannot add index " + index_name + ": index with this name already exists", + ErrorCodes::ILLEGAL_COLUMN}; + } + + auto insert_it = indices_description.indices.end(); + + if (!after_index_name.empty()) + { + insert_it = std::find_if( + indices_description.indices.begin(), + indices_description.indices.end(), + [this](const ASTPtr & index_ast) + { + return typeid_cast(*index_ast).name == after_index_name; + }); + + if (insert_it == indices_description.indices.end()) + throw Exception("Wrong index name. Cannot find index `" + after_index_name + "` to insert after.", + ErrorCodes::LOGICAL_ERROR); + + ++insert_it; + } + + indices_description.indices.emplace(insert_it, std::dynamic_pointer_cast(index_decl)); + } + else if (type == DROP_INDEX) + { + auto erase_it = std::find_if( + indices_description.indices.begin(), + indices_description.indices.end(), + [this](const ASTPtr & index_ast) + { + return typeid_cast(*index_ast).name == index_name; + }); + + if (erase_it == indices_description.indices.end()) + throw Exception("Wrong index name. Cannot find index `" + index_name + "` to drop.", + ErrorCodes::LOGICAL_ERROR); + + indices_description.indices.erase(erase_it); + } else throw Exception("Wrong parameter type in ALTER query", ErrorCodes::LOGICAL_ERROR); } @@ -311,17 +396,19 @@ bool AlterCommand::is_mutable() const return true; } -void AlterCommands::apply(ColumnsDescription & columns_description, ASTPtr & order_by_ast, ASTPtr & primary_key_ast) const +void AlterCommands::apply(ColumnsDescription & columns_description, IndicesDescription & indices_description, + ASTPtr & order_by_ast, ASTPtr & primary_key_ast) const { auto new_columns_description = columns_description; + auto new_indices_description = indices_description; auto new_order_by_ast = order_by_ast; auto new_primary_key_ast = primary_key_ast; for (const AlterCommand & command : *this) if (!command.ignore) - command.apply(new_columns_description, new_order_by_ast, new_primary_key_ast); - + command.apply(new_columns_description, new_indices_description, new_order_by_ast, new_primary_key_ast); columns_description = std::move(new_columns_description); + indices_description = std::move(new_indices_description); order_by_ast = std::move(new_order_by_ast); primary_key_ast = std::move(new_primary_key_ast); } @@ -538,14 +625,17 @@ void AlterCommands::validate(const IStorage & table, const Context & context) void AlterCommands::apply(ColumnsDescription & columns_description) const { auto out_columns_description = columns_description; + IndicesDescription indices_description; ASTPtr out_order_by; ASTPtr out_primary_key; - apply(out_columns_description, out_order_by, out_primary_key); + apply(out_columns_description, indices_description, out_order_by, out_primary_key); if (out_order_by) throw Exception("Storage doesn't support modifying ORDER BY expression", ErrorCodes::NOT_IMPLEMENTED); if (out_primary_key) throw Exception("Storage doesn't support modifying PRIMARY KEY expression", ErrorCodes::NOT_IMPLEMENTED); + if (!indices_description.indices.empty()) + throw Exception("Storage doesn't support modifying indices", ErrorCodes::NOT_IMPLEMENTED); columns_description = std::move(out_columns_description); } diff --git a/dbms/src/Storages/AlterCommands.h b/dbms/src/Storages/AlterCommands.h index 88b8dc1739f..10307736813 100644 --- a/dbms/src/Storages/AlterCommands.h +++ b/dbms/src/Storages/AlterCommands.h @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -23,6 +24,8 @@ struct AlterCommand MODIFY_COLUMN, COMMENT_COLUMN, MODIFY_ORDER_BY, + ADD_INDEX, + DROP_INDEX, UKNOWN_TYPE, }; @@ -52,6 +55,13 @@ struct AlterCommand /// For MODIFY_ORDER_BY ASTPtr order_by; + /// For ADD INDEX + ASTPtr index_decl; + String after_index_name; + + /// For ADD/DROP INDEX + String index_name; + /// indicates that this command should not be applied, for example in case of if_exists=true and column doesn't exist. bool ignore = false; @@ -70,7 +80,8 @@ struct AlterCommand static std::optional parse(const ASTAlterCommand * command); - void apply(ColumnsDescription & columns_description, ASTPtr & order_by_ast, ASTPtr & primary_key_ast) const; + void apply(ColumnsDescription & columns_description, IndicesDescription & indices_description, + ASTPtr & order_by_ast, ASTPtr & primary_key_ast) const; /// Checks that not only metadata touched by that command bool is_mutable() const; }; @@ -81,7 +92,8 @@ class Context; class AlterCommands : public std::vector { public: - void apply(ColumnsDescription & columns_description, ASTPtr & order_by_ast, ASTPtr & primary_key_ast) const; + void apply(ColumnsDescription & columns_description, IndicesDescription & indices_description, ASTPtr & order_by_ast, + ASTPtr & primary_key_ast) const; /// For storages that don't support MODIFY_ORDER_BY. void apply(ColumnsDescription & columns_description) const; diff --git a/dbms/src/Storages/IStorage.cpp b/dbms/src/Storages/IStorage.cpp index 07442ecbd15..0c2a4a4d0c9 100644 --- a/dbms/src/Storages/IStorage.cpp +++ b/dbms/src/Storages/IStorage.cpp @@ -24,8 +24,9 @@ void IStorage::alter(const AlterCommands & params, const String & database_name, auto lock = lockStructureForAlter(); auto new_columns = getColumns(); + auto new_indices = getIndicesDescription(); params.apply(new_columns); - context.getDatabase(database_name)->alterTable(context, table_name, new_columns, {}); + context.getDatabase(database_name)->alterTable(context, table_name, new_columns, new_indices, {}); setColumns(std::move(new_columns)); } diff --git a/dbms/src/Storages/ITableDeclaration.cpp b/dbms/src/Storages/ITableDeclaration.cpp index d68693a82bc..2315e5a5538 100644 --- a/dbms/src/Storages/ITableDeclaration.cpp +++ b/dbms/src/Storages/ITableDeclaration.cpp @@ -31,6 +31,11 @@ void ITableDeclaration::setColumns(ColumnsDescription columns_) columns = std::move(columns_); } +void ITableDeclaration::setIndicesDescription(IndicesDescription indices_) +{ + indices = std::move(indices_); +} + bool ITableDeclaration::hasColumn(const String & column_name) const { diff --git a/dbms/src/Storages/ITableDeclaration.h b/dbms/src/Storages/ITableDeclaration.h index 5f15ad626f7..e2ac3b1d6c4 100644 --- a/dbms/src/Storages/ITableDeclaration.h +++ b/dbms/src/Storages/ITableDeclaration.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB @@ -15,6 +16,9 @@ public: virtual const ColumnsDescription & getColumns() const { return columns; } virtual void setColumns(ColumnsDescription columns_); + virtual const IndicesDescription & getIndicesDescription() const { return indices; } + virtual void setIndicesDescription(IndicesDescription indices_); + /// NOTE: These methods should include virtual columns, but should NOT include ALIAS columns /// (they are treated separately). virtual NameAndTypePair getColumn(const String & column_name) const; @@ -52,6 +56,7 @@ public: private: ColumnsDescription columns; + IndicesDescription indices; }; } diff --git a/dbms/src/Storages/IndicesDescription.cpp b/dbms/src/Storages/IndicesDescription.cpp new file mode 100644 index 00000000000..35e5ae45629 --- /dev/null +++ b/dbms/src/Storages/IndicesDescription.cpp @@ -0,0 +1,38 @@ +#include + +#include +#include +#include + + +namespace DB +{ + +String IndicesDescription::toString() const +{ + if (indices.empty()) + return {}; + + ASTExpressionList list; + for (const auto & index : indices) + list.children.push_back(index); + + return serializeAST(list, true); +} + +IndicesDescription IndicesDescription::parse(const String & str) +{ + if (str.empty()) + return {}; + + IndicesDescription res; + ParserIndexDeclarationList parser; + ASTPtr list = parseQuery(parser, str, 0); + + for (const auto & index : list->children) + res.indices.push_back(std::dynamic_pointer_cast(index)); + + return res; +} + +} diff --git a/dbms/src/Storages/IndicesDescription.h b/dbms/src/Storages/IndicesDescription.h new file mode 100644 index 00000000000..1741886f852 --- /dev/null +++ b/dbms/src/Storages/IndicesDescription.h @@ -0,0 +1,22 @@ +#pragma once + +#include + + +namespace DB +{ + +using IndicesAsts = std::vector>; + +struct IndicesDescription +{ + IndicesAsts indices; + + IndicesDescription() = default; + + String toString() const; + + static IndicesDescription parse(const String & str); +}; + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 75a54a5020f..3526ec334a8 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -47,6 +47,7 @@ #include #include +#include #include #include #include @@ -88,6 +89,7 @@ namespace ErrorCodes MergeTreeData::MergeTreeData( const String & database_, const String & table_, const String & full_path_, const ColumnsDescription & columns_, + const IndicesDescription & indices_, Context & context_, const String & date_column_name, const ASTPtr & partition_by_ast_, @@ -113,7 +115,7 @@ MergeTreeData::MergeTreeData( data_parts_by_info(data_parts_indexes.get()), data_parts_by_state_and_info(data_parts_indexes.get()) { - setPrimaryKeyAndColumns(order_by_ast_, primary_key_ast_, columns_); + setPrimaryKeyIndicesAndColumns(order_by_ast_, primary_key_ast_, columns_, indices_); /// NOTE: using the same columns list as is read when performing actual merges. merging_params.check(getColumns().getAllPhysical()); @@ -219,8 +221,9 @@ static void checkKeyExpression(const ExpressionActions & expr, const Block & sam } -void MergeTreeData::setPrimaryKeyAndColumns( - const ASTPtr & new_order_by_ast, ASTPtr new_primary_key_ast, const ColumnsDescription & new_columns, bool only_check) +void MergeTreeData::setPrimaryKeyIndicesAndColumns( + const ASTPtr &new_order_by_ast, ASTPtr new_primary_key_ast, + const ColumnsDescription &new_columns, const IndicesDescription &indices_description, bool only_check) { if (!new_order_by_ast) throw Exception("ORDER BY cannot be empty", ErrorCodes::BAD_ARGUMENTS); @@ -327,6 +330,50 @@ void MergeTreeData::setPrimaryKeyAndColumns( new_primary_key_data_types.push_back(elem.type); } + ASTPtr skip_indices_with_primary_key_expr_list = new_primary_key_expr_list->clone(); + ASTPtr skip_indices_with_sorting_key_expr_list = new_sorting_key_expr_list->clone(); + + MergeTreeIndices new_indices; + + if (!indices_description.indices.empty()) + { + std::set indices_names; + + for (const auto & index_ast : indices_description.indices) + { + const auto & index_decl = std::dynamic_pointer_cast(index_ast); + + new_indices.push_back( + MergeTreeIndexFactory::instance().get( + all_columns, + std::dynamic_pointer_cast(index_decl->clone()), + global_context)); + + if (indices_names.find(new_indices.back()->name) != indices_names.end()) + throw Exception( + "Index with name `" + new_indices.back()->name + "` already exsists", + ErrorCodes::LOGICAL_ERROR); + + ASTPtr expr_list = MergeTreeData::extractKeyExpressionList(index_decl->expr->clone()); + for (const auto & expr : expr_list->children) + { + skip_indices_with_primary_key_expr_list->children.push_back(expr->clone()); + skip_indices_with_sorting_key_expr_list->children.push_back(expr->clone()); + } + + indices_names.insert(new_indices.back()->name); + } + } + auto syntax_primary = SyntaxAnalyzer(global_context, {}).analyze( + skip_indices_with_primary_key_expr_list, all_columns); + auto new_indices_with_primary_key_expr = ExpressionAnalyzer( + skip_indices_with_primary_key_expr_list, syntax_primary, global_context).getActions(false); + + auto syntax_sorting = SyntaxAnalyzer(global_context, {}).analyze( + skip_indices_with_sorting_key_expr_list, all_columns); + auto new_indices_with_sorting_key_expr = ExpressionAnalyzer( + skip_indices_with_sorting_key_expr_list, syntax_sorting, global_context).getActions(false); + if (!only_check) { setColumns(new_columns); @@ -342,6 +389,12 @@ void MergeTreeData::setPrimaryKeyAndColumns( primary_key_expr = std::move(new_primary_key_expr); primary_key_sample = std::move(new_primary_key_sample); primary_key_data_types = std::move(new_primary_key_data_types); + + setIndicesDescription(indices_description); + skip_indices = std::move(new_indices); + + primary_key_and_skip_indices_expr = new_indices_with_primary_key_expr; + sorting_key_and_skip_indices_expr = new_indices_with_sorting_key_expr; } } @@ -1001,9 +1054,10 @@ void MergeTreeData::checkAlter(const AlterCommands & commands) { /// Check that needed transformations can be applied to the list of columns without considering type conversions. auto new_columns = getColumns(); + auto new_indices = getIndicesDescription(); ASTPtr new_order_by_ast = order_by_ast; ASTPtr new_primary_key_ast = primary_key_ast; - commands.apply(new_columns, new_order_by_ast, new_primary_key_ast); + commands.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast); /// Set of columns that shouldn't be altered. NameSet columns_alter_forbidden; @@ -1021,6 +1075,12 @@ void MergeTreeData::checkAlter(const AlterCommands & commands) columns_alter_forbidden.insert(col); } + for (const auto & index : skip_indices) + { + for (const String & col : index->expr->getRequiredColumns()) + columns_alter_forbidden.insert(col); + } + if (sorting_key_expr) { for (const ExpressionAction & action : sorting_key_expr->getActions()) @@ -1075,18 +1135,21 @@ void MergeTreeData::checkAlter(const AlterCommands & commands) } } - setPrimaryKeyAndColumns(new_order_by_ast, new_primary_key_ast, new_columns, /* only_check = */ true); + setPrimaryKeyIndicesAndColumns(new_order_by_ast, new_primary_key_ast, + new_columns, new_indices, /* only_check = */ true); /// Check that type conversions are possible. ExpressionActionsPtr unused_expression; NameToNameMap unused_map; bool unused_bool; - createConvertExpression(nullptr, getColumns().getAllPhysical(), new_columns.getAllPhysical(), unused_expression, unused_map, unused_bool); + createConvertExpression(nullptr, getColumns().getAllPhysical(), new_columns.getAllPhysical(), + getIndicesDescription().indices, new_indices.indices, unused_expression, unused_map, unused_bool); } void MergeTreeData::createConvertExpression(const DataPartPtr & part, const NamesAndTypesList & old_columns, const NamesAndTypesList & new_columns, - ExpressionActionsPtr & out_expression, NameToNameMap & out_rename_map, bool & out_force_update_metadata) const + const IndicesAsts & old_indices, const IndicesAsts & new_indices, ExpressionActionsPtr & out_expression, + NameToNameMap & out_rename_map, bool & out_force_update_metadata) const { out_expression = nullptr; out_rename_map = {}; @@ -1100,6 +1163,21 @@ void MergeTreeData::createConvertExpression(const DataPartPtr & part, const Name /// For every column that need to be converted: source column name, column name of calculated expression for conversion. std::vector> conversions; + + /// Remove old indices + std::set new_indices_set; + for (const auto & index_decl : new_indices) + new_indices_set.emplace(dynamic_cast(*index_decl.get()).name); + for (const auto & index_decl : old_indices) + { + const auto & index = dynamic_cast(*index_decl.get()); + if (!new_indices_set.count(index.name)) + { + out_rename_map["skp_idx_" + index.name + ".idx"] = ""; + out_rename_map["skp_idx_" + index.name + ".mrk"] = ""; + } + } + /// Collect counts for shared streams of different columns. As an example, Nested columns have shared stream with array sizes. std::map stream_counts; for (const NameAndTypePair & column : old_columns) @@ -1230,12 +1308,15 @@ void MergeTreeData::createConvertExpression(const DataPartPtr & part, const Name MergeTreeData::AlterDataPartTransactionPtr MergeTreeData::alterDataPart( const DataPartPtr & part, const NamesAndTypesList & new_columns, + const IndicesAsts & new_indices, bool skip_sanity_checks) { ExpressionActionsPtr expression; AlterDataPartTransactionPtr transaction(new AlterDataPartTransaction(part)); /// Blocks changes to the part. bool force_update_metadata; - createConvertExpression(part, part->columns, new_columns, expression, transaction->rename_map, force_update_metadata); + createConvertExpression(part, part->columns, new_columns, + getIndicesDescription().indices, new_indices, + expression, transaction->rename_map, force_update_metadata); size_t num_files_to_modify = transaction->rename_map.size(); size_t num_files_to_remove = 0; @@ -2062,7 +2143,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::loadPartAndFixMetadata(const St /// Check the data while we are at it. if (part->checksums.empty()) { - part->checksums = checkDataPart(full_part_path, index_granularity, false, primary_key_data_types); + part->checksums = checkDataPart(full_part_path, index_granularity, false, primary_key_data_types, skip_indices); { WriteBufferFromFile out(full_part_path + "checksums.txt.tmp", 4096); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index b79794bee0e..21e0d296933 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -13,6 +14,7 @@ #include #include #include +#include #include #include @@ -303,6 +305,7 @@ public: MergeTreeData(const String & database_, const String & table_, const String & full_path_, const ColumnsDescription & columns_, + const IndicesDescription & indices_, Context & context_, const String & date_column_name, const ASTPtr & partition_by_ast_, @@ -476,7 +479,7 @@ public: /// Check if the ALTER can be performed: /// - all needed columns are present. /// - all type conversions can be done. - /// - columns corresponding to primary key, sign, sampling expression and date are not affected. + /// - columns corresponding to primary key, indices, sign, sampling expression and date are not affected. /// If something is wrong, throws an exception. void checkAlter(const AlterCommands & commands); @@ -487,6 +490,7 @@ public: AlterDataPartTransactionPtr alterDataPart( const DataPartPtr & part, const NamesAndTypesList & new_columns, + const IndicesAsts & new_indices, bool skip_sanity_checks); /// Freezes all parts. @@ -508,6 +512,7 @@ public: bool hasSortingKey() const { return !sorting_key_columns.empty(); } bool hasPrimaryKey() const { return !primary_key_columns.empty(); } + bool hasSkipIndices() const { return !skip_indices.empty(); } ASTPtr getSortingKeyAST() const { return sorting_key_expr_ast; } ASTPtr getPrimaryKeyAST() const { return primary_key_expr_ast; } @@ -581,6 +586,12 @@ public: Int64 minmax_idx_date_column_pos = -1; /// In a common case minmax index includes a date column. Int64 minmax_idx_time_column_pos = -1; /// In other cases, minmax index often includes a dateTime column. + /// Secondary (data skipping) indices for MergeTree + MergeTreeIndices skip_indices; + + ExpressionActionsPtr primary_key_and_skip_indices_expr; + ExpressionActionsPtr sorting_key_and_skip_indices_expr; + /// Names of columns for primary key + secondary sorting columns. Names sorting_key_columns; ASTPtr sorting_key_expr_ast; @@ -721,7 +732,9 @@ private: /// The same for clearOldTemporaryDirectories. std::mutex clear_old_temporary_directories_mutex; - void setPrimaryKeyAndColumns(const ASTPtr & new_order_by_ast, ASTPtr new_primary_key_ast, const ColumnsDescription & new_columns, bool only_check = false); + void setPrimaryKeyIndicesAndColumns(const ASTPtr &new_order_by_ast, ASTPtr new_primary_key_ast, + const ColumnsDescription &new_columns, + const IndicesDescription &indices_description, bool only_check = false); void initPartitionKey(); @@ -733,7 +746,8 @@ private: /// Files to be deleted are mapped to an empty string in out_rename_map. /// If part == nullptr, just checks that all type conversions are possible. void createConvertExpression(const DataPartPtr & part, const NamesAndTypesList & old_columns, const NamesAndTypesList & new_columns, - ExpressionActionsPtr & out_expression, NameToNameMap & out_rename_map, bool & out_force_update_metadata) const; + const IndicesAsts & old_indices, const IndicesAsts & new_indices, + ExpressionActionsPtr & out_expression, NameToNameMap & out_rename_map, bool & out_force_update_metadata) const; /// Calculates column sizes in compressed form for the current state of data_parts. Call with data_parts mutex locked. void calculateColumnSizesImpl(); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index acc94cd016a..582667c9083 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -334,12 +334,19 @@ MergeTreeData::DataPartsVector MergeTreeDataMergerMutator::selectAllPartsFromPar static void extractMergingAndGatheringColumns( const NamesAndTypesList & all_columns, const ExpressionActionsPtr & sorting_key_expr, + const MergeTreeIndices & indexes, const MergeTreeData::MergingParams & merging_params, NamesAndTypesList & gathering_columns, Names & gathering_column_names, NamesAndTypesList & merging_columns, Names & merging_column_names) { Names sort_key_columns_vec = sorting_key_expr->getRequiredColumns(); std::set key_columns(sort_key_columns_vec.cbegin(), sort_key_columns_vec.cend()); + for (const auto & index : indexes) + { + Names index_columns_vec = index->expr->getRequiredColumns(); + std::copy(index_columns_vec.cbegin(), index_columns_vec.cend(), + std::inserter(key_columns, key_columns.end())); + } /// Force sign column for Collapsing mode if (merging_params.mode == MergeTreeData::MergingParams::Collapsing) @@ -550,7 +557,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor NamesAndTypesList gathering_columns, merging_columns; Names gathering_column_names, merging_column_names; extractMergingAndGatheringColumns( - all_columns, data.sorting_key_expr, + all_columns, data.sorting_key_expr, data.skip_indices, data.merging_params, gathering_columns, gathering_column_names, merging_columns, merging_column_names); MergeTreeData::MutableDataPartPtr new_data_part = std::make_shared( @@ -629,11 +636,12 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor input->setProgressCallback(MergeProgressCallback( merge_entry, sum_input_rows_upper_bound, column_sizes, watch_prev_elapsed, merge_alg)); - if (data.hasPrimaryKey()) - src_streams.emplace_back(std::make_shared( - std::make_shared(BlockInputStreamPtr(std::move(input)), data.sorting_key_expr))); - else - src_streams.emplace_back(std::move(input)); + BlockInputStreamPtr stream = std::move(input); + if (data.hasPrimaryKey() || data.hasSkipIndices()) + stream = std::make_shared( + std::make_shared(stream, data.sorting_key_and_skip_indices_expr)); + + src_streams.emplace_back(stream); } Names sort_columns = data.sorting_key_columns; @@ -897,10 +905,9 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor if (in_header.columns() == all_columns.size()) { /// All columns are modified, proceed to write a new part from scratch. - - if (data.hasPrimaryKey()) + if (data.hasPrimaryKey() || data.hasSkipIndices()) in = std::make_shared( - std::make_shared(in, data.primary_key_expr)); + std::make_shared(in, data.primary_key_and_skip_indices_expr)); MergeTreeDataPart::MinMaxIndex minmax_idx; @@ -927,6 +934,20 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor /// We will modify only some of the columns. Other columns and key values can be copied as-is. /// TODO: check that we modify only non-key columns in this case. + /// Checks if columns used in skipping indexes modified/ + for (const auto & col : in_header.getNames()) + { + for (const auto & index : data.skip_indices) + { + const auto & index_cols = index->expr->getRequiredColumns(); + auto it = find(cbegin(index_cols), cend(index_cols), col); + if (it != cend(index_cols)) + throw Exception("You can not modify columns used in index. Index name: '" + + index->name + + "' bad column: '" + *it + "'", ErrorCodes::ILLEGAL_COLUMN); + } + } + NameSet files_to_skip = {"checksums.txt", "columns.txt"}; for (const auto & entry : in_header) { diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 97f8466be20..58f5842b5f5 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -120,7 +120,7 @@ public: enum class MergeAlgorithm { Horizontal, /// per-row merge of all columns - Vertical /// per-row merge of PK columns, per-column gather for non-PK columns + Vertical /// per-row merge of PK and secondary indices columns, per-column gather for non-PK columns }; private: diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h index 64f3863082a..b49a1ca7c9d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 897083bee2f..61024c383e9 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1,11 +1,15 @@ #include /// For calculations related to sampling coefficients. #include +#include + #include #include #include #include #include +#include +#include #include #include #include @@ -528,6 +532,17 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts( else ranges.ranges = MarkRanges{MarkRange{0, part->marks_count}}; + /// It can be done in multiple threads (one thread for each part). + /// Maybe it should be moved to BlockInputStream, but it can cause some problems. + for (const auto & index : data.skip_indices) + { + auto condition = index->createIndexCondition(query_info, context); + if (!condition->alwaysUnknownOrTrue()) + { + ranges.ranges = filterMarksUsingIndex(index, condition, part, ranges.ranges, settings); + } + } + if (!ranges.ranges.empty()) { parts_with_ranges.push_back(ranges); @@ -942,4 +957,70 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( return res; } +MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( + MergeTreeIndexPtr index, + IndexConditionPtr condition, + MergeTreeData::DataPartPtr part, + const MarkRanges & ranges, + const Settings & settings) const +{ + if (!Poco::File(part->getFullPath() + index->getFileName() + ".idx").exists()) + { + LOG_DEBUG(log, "File for index `" << index->name << "` does not exist. Skipping it."); + return ranges; + } + + const size_t min_marks_for_seek = (settings.merge_tree_min_rows_for_seek + data.index_granularity - 1) / data.index_granularity; + + size_t granules_dropped = 0; + + MergeTreeIndexReader reader( + index, part, + ((part->marks_count + index->granularity - 1) / index->granularity), + ranges); + + MarkRanges res; + + /// Some granules can cover two or more ranges, + /// this variable is stored to avoid reading the same granule twice. + MergeTreeIndexGranulePtr granule = nullptr; + size_t last_index_mark = 0; + for (const auto & range : ranges) + { + MarkRange index_range( + range.begin / index->granularity, + (range.end + index->granularity - 1) / index->granularity); + + if (last_index_mark != index_range.begin || !granule) + reader.seek(index_range.begin); + + for (size_t index_mark = index_range.begin; index_mark < index_range.end; ++index_mark) + { + if (index_mark != index_range.begin || !granule || last_index_mark != index_range.begin) + granule = reader.read(); + + MarkRange data_range( + std::max(range.begin, index_mark * index->granularity), + std::min(range.end, (index_mark + 1) * index->granularity)); + + if (!condition->mayBeTrueOnGranule(granule)) + { + ++granules_dropped; + continue; + } + + if (res.empty() || res.back().end - data_range.begin >= min_marks_for_seek) + res.push_back(data_range); + else + res.back().end = data_range.end; + } + + last_index_mark = index_range.end - 1; + } + + LOG_DEBUG(log, "Index `" << index->name << "` has dropped " << granules_dropped << " granules."); + + return res; +} + } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 576b88f2e41..22f56a14e27 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -81,6 +81,13 @@ private: const MergeTreeData::DataPart::Index & index, const KeyCondition & key_condition, const Settings & settings) const; + + MarkRanges filterMarksUsingIndex( + MergeTreeIndexPtr index, + IndexConditionPtr condition, + MergeTreeData::DataPartPtr part, + const MarkRanges & ranges, + const Settings & settings) const; }; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index e053ba3d8ca..f5a4ac74a6d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -180,8 +180,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa dir.createDirectories(); /// If we need to calculate some columns to sort. - if (data.hasSortingKey()) - data.sorting_key_expr->execute(block); + if (data.hasSortingKey() || data.hasSkipIndices()) + data.sorting_key_and_skip_indices_expr->execute(block); Names sort_columns = data.sorting_key_columns; SortDescription sort_description; diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexReader.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexReader.cpp new file mode 100644 index 00000000000..b1836d085b4 --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexReader.cpp @@ -0,0 +1,29 @@ +#include + + +namespace DB +{ + +MergeTreeIndexReader::MergeTreeIndexReader( + MergeTreeIndexPtr index, MergeTreeData::DataPartPtr part, size_t marks_count, const MarkRanges & all_mark_ranges) + : index(index), stream( + part->getFullPath() + index->getFileName(), ".idx", marks_count, + all_mark_ranges, nullptr, false, nullptr, 0, DBMS_DEFAULT_BUFFER_SIZE, + ReadBufferFromFileBase::ProfileCallback{}, CLOCK_MONOTONIC_COARSE) +{ + stream.seekToStart(); +} + +void MergeTreeIndexReader::seek(size_t mark) +{ + stream.seekToMark(mark); +} + +MergeTreeIndexGranulePtr MergeTreeIndexReader::read() +{ + auto granule = index->createIndexGranule(); + granule->deserializeBinary(*stream.data_buffer); + return granule; +} + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexReader.h b/dbms/src/Storages/MergeTree/MergeTreeIndexReader.h new file mode 100644 index 00000000000..38dbd69f6e6 --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexReader.h @@ -0,0 +1,28 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +class MergeTreeIndexReader +{ +public: + MergeTreeIndexReader( + MergeTreeIndexPtr index, + MergeTreeData::DataPartPtr part, + size_t marks_count, + const MarkRanges & all_mark_ranges); + + void seek(size_t mark); + + MergeTreeIndexGranulePtr read(); + +private: + MergeTreeIndexPtr index; + MergeTreeReaderStream stream; +}; + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndices.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndices.cpp new file mode 100644 index 00000000000..abceee5ea2d --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeIndices.cpp @@ -0,0 +1,57 @@ +#include +#include +#include +#include +#include + +#include + +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int INCORRECT_QUERY; + extern const int UNKNOWN_EXCEPTION; +} + +void MergeTreeIndexFactory::registerIndex(const std::string &name, Creator creator) +{ + if (!indexes.emplace(name, std::move(creator)).second) + throw Exception("MergeTreeIndexFactory: the Index creator name '" + name + "' is not unique", + ErrorCodes::LOGICAL_ERROR); +} + +std::unique_ptr MergeTreeIndexFactory::get( + const NamesAndTypesList & columns, + std::shared_ptr node, + const Context & context) const +{ + if (!node->type) + throw Exception( + "for index TYPE is required", ErrorCodes::INCORRECT_QUERY); + if (node->type->parameters && !node->type->parameters->children.empty()) + throw Exception( + "Index type can not have parameters", ErrorCodes::INCORRECT_QUERY); + + boost::algorithm::to_lower(node->type->name); + auto it = indexes.find(node->type->name); + if (it == indexes.end()) + throw Exception( + "Unknown Index type '" + node->type->name + "'. Available index types: " + + std::accumulate(indexes.cbegin(), indexes.cend(), std::string{}, + [] (auto && lft, const auto & rht) -> std::string { + if (lft == "") + return rht.first; + else + return lft + ", " + rht.first; + }), + ErrorCodes::INCORRECT_QUERY); + return it->second(columns, node, context); +} + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndices.h b/dbms/src/Storages/MergeTree/MergeTreeIndices.h new file mode 100644 index 00000000000..2ccba3195fe --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeIndices.h @@ -0,0 +1,126 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +constexpr auto INDEX_FILE_PREFIX = "skp_idx_"; + +namespace DB +{ + +class MergeTreeData; +class MergeTreeIndex; + +using MergeTreeIndexPtr = std::shared_ptr; +using MutableMergeTreeIndexPtr = std::shared_ptr; + + +struct MergeTreeIndexGranule +{ + virtual ~MergeTreeIndexGranule() = default; + + virtual void serializeBinary(WriteBuffer & ostr) const = 0; + virtual void deserializeBinary(ReadBuffer & istr) = 0; + + virtual String toString() const = 0; + virtual bool empty() const = 0; + + virtual void update(const Block & block, size_t * pos, size_t limit) = 0; +}; + + +using MergeTreeIndexGranulePtr = std::shared_ptr; +using MergeTreeIndexGranules = std::vector; + +/// Condition on the index. +class IndexCondition +{ +public: + virtual ~IndexCondition() = default; + /// Checks if this index is useful for query. + virtual bool alwaysUnknownOrTrue() const = 0; + + virtual bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr granule) const = 0; +}; + +using IndexConditionPtr = std::shared_ptr; + + +/// Structure for storing basic index info like columns, expression, arguments, ... +class MergeTreeIndex +{ +public: + MergeTreeIndex( + String name, + ExpressionActionsPtr expr, + const Names & columns, + const DataTypes & data_types, + const Block & header, + size_t granularity) + : name(name) + , expr(expr) + , columns(columns) + , data_types(data_types) + , header(header) + , granularity(granularity) {} + + virtual ~MergeTreeIndex() = default; + + /// gets filename without extension + String getFileName() const { return INDEX_FILE_PREFIX + name; } + + virtual MergeTreeIndexGranulePtr createIndexGranule() const = 0; + + virtual IndexConditionPtr createIndexCondition( + const SelectQueryInfo & query_info, const Context & context) const = 0; + + String name; + ExpressionActionsPtr expr; + Names columns; + DataTypes data_types; + Block header; + size_t granularity; +}; + + +using MergeTreeIndices = std::vector; + + +class MergeTreeIndexFactory : public ext::singleton +{ + friend class ext::singleton; + +public: + using Creator = std::function< + std::unique_ptr( + const NamesAndTypesList & columns, + std::shared_ptr node, + const Context & context)>; + + std::unique_ptr get( + const NamesAndTypesList & columns, + std::shared_ptr node, + const Context & context) const; + + void registerIndex(const std::string & name, Creator creator); + + const auto & getAllIndexes() const { return indexes; } + +protected: + MergeTreeIndexFactory() = default; + +private: + using Indexes = std::unordered_map; + Indexes indexes; +}; + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp new file mode 100644 index 00000000000..d15e86a6e22 --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp @@ -0,0 +1,164 @@ +#include + +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int INCORRECT_QUERY; +} + + +MergeTreeMinMaxGranule::MergeTreeMinMaxGranule(const MergeTreeMinMaxIndex & index) + : MergeTreeIndexGranule(), index(index), parallelogram() +{ +} + +void MergeTreeMinMaxGranule::serializeBinary(WriteBuffer & ostr) const +{ + if (empty()) + throw Exception( + "Attempt to write empty minmax index `" + index.name + "`", ErrorCodes::LOGICAL_ERROR); + + for (size_t i = 0; i < index.columns.size(); ++i) + { + const DataTypePtr & type = index.data_types[i]; + + type->serializeBinary(parallelogram[i].left, ostr); + type->serializeBinary(parallelogram[i].right, ostr); + } +} + +void MergeTreeMinMaxGranule::deserializeBinary(ReadBuffer & istr) +{ + parallelogram.clear(); + for (size_t i = 0; i < index.columns.size(); ++i) + { + const DataTypePtr & type = index.data_types[i]; + + Field min_val; + type->deserializeBinary(min_val, istr); + Field max_val; + type->deserializeBinary(max_val, istr); + + parallelogram.emplace_back(min_val, true, max_val, true); + } +} + +String MergeTreeMinMaxGranule::toString() const +{ + String res = ""; + + for (size_t i = 0; i < parallelogram.size(); ++i) + { + res += "[" + + applyVisitor(FieldVisitorToString(), parallelogram[i].left) + ", " + + applyVisitor(FieldVisitorToString(), parallelogram[i].right) + "]"; + } + + return res; +} + +void MergeTreeMinMaxGranule::update(const Block & block, size_t * pos, size_t limit) +{ + size_t rows_read = std::min(limit, block.rows() - *pos); + + for (size_t i = 0; i < index.columns.size(); ++i) + { + const auto & column = block.getByName(index.columns[i]).column; + + Field field_min, field_max; + column->cut(*pos, rows_read)->getExtremes(field_min, field_max); + + if (parallelogram.size() <= i) + { + parallelogram.emplace_back(field_min, true, field_max, true); + } + else + { + parallelogram[i].left = std::min(parallelogram[i].left, field_min); + parallelogram[i].right = std::max(parallelogram[i].right, field_max); + } + } + + *pos += rows_read; +} + + +MinMaxCondition::MinMaxCondition( + const SelectQueryInfo &query, + const Context &context, + const MergeTreeMinMaxIndex &index) + : IndexCondition(), index(index), condition(query, context, index.columns, index.expr) {} + +bool MinMaxCondition::alwaysUnknownOrTrue() const +{ + return condition.alwaysUnknownOrTrue(); +} + +bool MinMaxCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const +{ + std::shared_ptr granule + = std::dynamic_pointer_cast(idx_granule); + if (!granule) + throw Exception( + "Minmax index condition got wrong granule", ErrorCodes::LOGICAL_ERROR); + + return condition.mayBeTrueInParallelogram(granule->parallelogram, index.data_types); +} + + +MergeTreeIndexGranulePtr MergeTreeMinMaxIndex::createIndexGranule() const +{ + return std::make_shared(*this); +} + +IndexConditionPtr MergeTreeMinMaxIndex::createIndexCondition( + const SelectQueryInfo & query, const Context & context) const +{ +return std::make_shared(query, context, *this); +}; + + +std::unique_ptr MergeTreeMinMaxIndexCreator( + const NamesAndTypesList & new_columns, + std::shared_ptr node, + const Context & context) +{ + if (node->name.empty()) + throw Exception("Index must have unique name", ErrorCodes::INCORRECT_QUERY); + + if (node->type->arguments) + throw Exception("Minmax index have not any arguments", ErrorCodes::INCORRECT_QUERY); + + ASTPtr expr_list = MergeTreeData::extractKeyExpressionList(node->expr->clone()); + auto syntax = SyntaxAnalyzer(context, {}).analyze( + expr_list, new_columns); + auto minmax_expr = ExpressionAnalyzer(expr_list, syntax, context).getActions(false); + + auto sample = ExpressionAnalyzer(expr_list, syntax, context) + .getActions(true)->getSampleBlock(); + + Names columns; + DataTypes data_types; + + for (size_t i = 0; i < expr_list->children.size(); ++i) + { + const auto & column = sample.getByPosition(i); + + columns.emplace_back(column.name); + data_types.emplace_back(column.type); + } + + return std::make_unique( + node->name, std::move(minmax_expr), columns, data_types, sample, node->granularity.get());; +} + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h new file mode 100644 index 00000000000..ac1ca1787e1 --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h @@ -0,0 +1,78 @@ +#pragma once + +#include +#include +#include + +#include + + +namespace DB +{ + +class MergeTreeMinMaxIndex; + + +struct MergeTreeMinMaxGranule : public MergeTreeIndexGranule +{ + explicit MergeTreeMinMaxGranule(const MergeTreeMinMaxIndex & index); + + void serializeBinary(WriteBuffer & ostr) const override; + void deserializeBinary(ReadBuffer & istr) override; + + String toString() const override; + bool empty() const override { return parallelogram.empty(); } + + void update(const Block & block, size_t * pos, size_t limit) override; + + ~MergeTreeMinMaxGranule() override = default; + + const MergeTreeMinMaxIndex & index; + std::vector parallelogram; +}; + + +class MinMaxCondition : public IndexCondition +{ +public: + MinMaxCondition( + const SelectQueryInfo & query, + const Context & context, + const MergeTreeMinMaxIndex & index); + + bool alwaysUnknownOrTrue() const override; + + bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const override; + + ~MinMaxCondition() override = default; +private: + const MergeTreeMinMaxIndex & index; + KeyCondition condition; +}; + + +class MergeTreeMinMaxIndex : public MergeTreeIndex +{ +public: + MergeTreeMinMaxIndex( + String name, + ExpressionActionsPtr expr, + const Names & columns, + const DataTypes & data_types, + const Block & header, + size_t granularity) + : MergeTreeIndex(name, expr, columns, data_types, header, granularity) {} + + ~MergeTreeMinMaxIndex() override = default; + + MergeTreeIndexGranulePtr createIndexGranule() const override; + + IndexConditionPtr createIndexCondition( + const SelectQueryInfo & query, const Context & context) const override; + +}; + +std::unique_ptr MergeTreeMinMaxIndexCreator( + const NamesAndTypesList & columns, std::shared_ptr node, const Context & context); + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeReader.cpp b/dbms/src/Storages/MergeTree/MergeTreeReader.cpp index de39ca1b7e7..eca9c5818e0 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeReader.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeReader.cpp @@ -154,205 +154,6 @@ size_t MergeTreeReader::readRows(size_t from_mark, bool continue_reading, size_t return read_rows; } - -MergeTreeReader::Stream::Stream( - const String & path_prefix_, const String & extension_, size_t marks_count_, - const MarkRanges & all_mark_ranges, - MarkCache * mark_cache_, bool save_marks_in_cache_, - UncompressedCache * uncompressed_cache, - size_t aio_threshold, size_t max_read_buffer_size, - const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type) - : path_prefix(path_prefix_), extension(extension_), marks_count(marks_count_) - , mark_cache(mark_cache_), save_marks_in_cache(save_marks_in_cache_) -{ - /// Compute the size of the buffer. - size_t max_mark_range = 0; - - for (size_t i = 0; i < all_mark_ranges.size(); ++i) - { - size_t right = all_mark_ranges[i].end; - /// NOTE: if we are reading the whole file, then right == marks_count - /// and we will use max_read_buffer_size for buffer size, thus avoiding the need to load marks. - - /// If the end of range is inside the block, we will need to read it too. - if (right < marks_count && getMark(right).offset_in_decompressed_block > 0) - { - while (right < marks_count - && getMark(right).offset_in_compressed_file - == getMark(all_mark_ranges[i].end).offset_in_compressed_file) - { - ++right; - } - } - - /// If there are no marks after the end of range, just use max_read_buffer_size - if (right >= marks_count - || (right + 1 == marks_count - && getMark(right).offset_in_compressed_file - == getMark(all_mark_ranges[i].end).offset_in_compressed_file)) - { - max_mark_range = max_read_buffer_size; - break; - } - - max_mark_range = std::max(max_mark_range, - getMark(right).offset_in_compressed_file - getMark(all_mark_ranges[i].begin).offset_in_compressed_file); - } - - /// Avoid empty buffer. May happen while reading dictionary for DataTypeLowCardinality. - /// For example: part has single dictionary and all marks point to the same position. - if (max_mark_range == 0) - max_mark_range = max_read_buffer_size; - - size_t buffer_size = std::min(max_read_buffer_size, max_mark_range); - - /// Estimate size of the data to be read. - size_t estimated_size = 0; - if (aio_threshold > 0) - { - for (const auto & mark_range : all_mark_ranges) - { - size_t offset_begin = (mark_range.begin > 0) - ? getMark(mark_range.begin).offset_in_compressed_file - : 0; - - size_t offset_end = (mark_range.end < marks_count) - ? getMark(mark_range.end).offset_in_compressed_file - : Poco::File(path_prefix + extension).getSize(); - - if (offset_end > offset_begin) - estimated_size += offset_end - offset_begin; - } - } - - /// Initialize the objects that shall be used to perform read operations. - if (uncompressed_cache) - { - auto buffer = std::make_unique( - path_prefix + extension, uncompressed_cache, estimated_size, aio_threshold, buffer_size); - - if (profile_callback) - buffer->setProfileCallback(profile_callback, clock_type); - - cached_buffer = std::move(buffer); - data_buffer = cached_buffer.get(); - } - else - { - auto buffer = std::make_unique( - path_prefix + extension, estimated_size, aio_threshold, buffer_size); - - if (profile_callback) - buffer->setProfileCallback(profile_callback, clock_type); - - non_cached_buffer = std::move(buffer); - data_buffer = non_cached_buffer.get(); - } -} - - -const MarkInCompressedFile & MergeTreeReader::Stream::getMark(size_t index) -{ - if (!marks) - loadMarks(); - return (*marks)[index]; -} - - -void MergeTreeReader::Stream::loadMarks() -{ - std::string mrk_path = path_prefix + ".mrk"; - - auto load = [&]() -> MarkCache::MappedPtr - { - /// Memory for marks must not be accounted as memory usage for query, because they are stored in shared cache. - auto temporarily_disable_memory_tracker = getCurrentMemoryTrackerActionLock(); - - size_t file_size = Poco::File(mrk_path).getSize(); - size_t expected_file_size = sizeof(MarkInCompressedFile) * marks_count; - if (expected_file_size != file_size) - throw Exception( - "bad size of marks file `" + mrk_path + "':" + std::to_string(file_size) + ", must be: " + std::to_string(expected_file_size), - ErrorCodes::CORRUPTED_DATA); - - auto res = std::make_shared(marks_count); - - /// Read directly to marks. - ReadBufferFromFile buffer(mrk_path, file_size, -1, reinterpret_cast(res->data())); - - if (buffer.eof() || buffer.buffer().size() != file_size) - throw Exception("Cannot read all marks from file " + mrk_path, ErrorCodes::CANNOT_READ_ALL_DATA); - - return res; - }; - - if (mark_cache) - { - auto key = mark_cache->hash(mrk_path); - if (save_marks_in_cache) - { - marks = mark_cache->getOrSet(key, load); - } - else - { - marks = mark_cache->get(key); - if (!marks) - marks = load(); - } - } - else - marks = load(); - - if (!marks) - throw Exception("Failed to load marks: " + mrk_path, ErrorCodes::LOGICAL_ERROR); -} - - -void MergeTreeReader::Stream::seekToMark(size_t index) -{ - MarkInCompressedFile mark = getMark(index); - - try - { - if (cached_buffer) - cached_buffer->seek(mark.offset_in_compressed_file, mark.offset_in_decompressed_block); - if (non_cached_buffer) - non_cached_buffer->seek(mark.offset_in_compressed_file, mark.offset_in_decompressed_block); - } - catch (Exception & e) - { - /// Better diagnostics. - if (e.code() == ErrorCodes::ARGUMENT_OUT_OF_BOUND) - e.addMessage("(while seeking to mark " + toString(index) - + " of column " + path_prefix + "; offsets are: " - + toString(mark.offset_in_compressed_file) + " " - + toString(mark.offset_in_decompressed_block) + ")"); - - throw; - } -} - - -void MergeTreeReader::Stream::seekToStart() -{ - try - { - if (cached_buffer) - cached_buffer->seek(0, 0); - if (non_cached_buffer) - non_cached_buffer->seek(0, 0); - } - catch (Exception & e) - { - /// Better diagnostics. - if (e.code() == ErrorCodes::ARGUMENT_OUT_OF_BOUND) - e.addMessage("(while seeking to start of column " + path_prefix + ")"); - - throw; - } -} - - void MergeTreeReader::addStreams(const String & name, const IDataType & type, const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type) { @@ -371,7 +172,7 @@ void MergeTreeReader::addStreams(const String & name, const IDataType & type, if (!data_file_exists) return; - streams.emplace(stream_name, std::make_unique( + streams.emplace(stream_name, std::make_unique( path + stream_name, DATA_FILE_EXTENSION, data_part->marks_count, all_mark_ranges, mark_cache, save_marks_in_cache, uncompressed_cache, aio_threshold, max_read_buffer_size, profile_callback, clock_type)); @@ -401,7 +202,7 @@ void MergeTreeReader::readData( if (it == streams.end()) return nullptr; - Stream & stream = *it->second; + MergeTreeReaderStream & stream = *it->second; if (stream_for_prefix) { diff --git a/dbms/src/Storages/MergeTree/MergeTreeReader.h b/dbms/src/Storages/MergeTree/MergeTreeReader.h index ac5d46fb664..74bef7ac118 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeReader.h +++ b/dbms/src/Storages/MergeTree/MergeTreeReader.h @@ -1,11 +1,7 @@ #pragma once -#include -#include -#include -#include -#include #include +#include #include @@ -13,7 +9,6 @@ namespace DB { class IDataType; -class CachedCompressedReadBuffer; /// Reads the data between pairs of marks in the same part. When reading consecutive ranges, avoids unnecessary seeks. /// When ranges are almost consecutive, seeks are fast because they are performed inside the buffer. @@ -57,44 +52,7 @@ public: size_t readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Block & res); private: - class Stream - { - public: - Stream( - const String & path_prefix_, const String & extension_, size_t marks_count_, - const MarkRanges & all_mark_ranges, - MarkCache * mark_cache, bool save_marks_in_cache, - UncompressedCache * uncompressed_cache, - size_t aio_threshold, size_t max_read_buffer_size, - const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type); - - void seekToMark(size_t index); - void seekToStart(); - - ReadBuffer * data_buffer; - - private: - Stream() = default; - - /// NOTE: lazily loads marks from the marks cache. - const MarkInCompressedFile & getMark(size_t index); - - void loadMarks(); - - std::string path_prefix; - std::string extension; - - size_t marks_count; - - MarkCache * mark_cache; - bool save_marks_in_cache; - MarkCache::MappedPtr marks; - - std::unique_ptr cached_buffer; - std::unique_ptr non_cached_buffer; - }; - - using FileStreams = std::map>; + using FileStreams = std::map>; /// avg_value_size_hints are used to reduce the number of reallocations when creating columns of variable size. ValueSizeMap avg_value_size_hints; diff --git a/dbms/src/Storages/MergeTree/MergeTreeReaderStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeReaderStream.cpp new file mode 100644 index 00000000000..890e7b13c6c --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeReaderStream.cpp @@ -0,0 +1,215 @@ +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int CORRUPTED_DATA; + extern const int CANNOT_READ_ALL_DATA; + extern const int ARGUMENT_OUT_OF_BOUND; +} + + +MergeTreeReaderStream::MergeTreeReaderStream( + const String & path_prefix_, const String & extension_, size_t marks_count_, + const MarkRanges & all_mark_ranges, + MarkCache * mark_cache_, bool save_marks_in_cache_, + UncompressedCache * uncompressed_cache, + size_t aio_threshold, size_t max_read_buffer_size, + const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type) + : path_prefix(path_prefix_), extension(extension_), marks_count(marks_count_) + , mark_cache(mark_cache_), save_marks_in_cache(save_marks_in_cache_) +{ + /// Compute the size of the buffer. + size_t max_mark_range = 0; + + for (size_t i = 0; i < all_mark_ranges.size(); ++i) + { + size_t right = all_mark_ranges[i].end; + /// NOTE: if we are reading the whole file, then right == marks_count + /// and we will use max_read_buffer_size for buffer size, thus avoiding the need to load marks. + + /// If the end of range is inside the block, we will need to read it too. + if (right < marks_count && getMark(right).offset_in_decompressed_block > 0) + { + while (right < marks_count + && getMark(right).offset_in_compressed_file + == getMark(all_mark_ranges[i].end).offset_in_compressed_file) + { + ++right; + } + } + + /// If there are no marks after the end of range, just use max_read_buffer_size + if (right >= marks_count + || (right + 1 == marks_count + && getMark(right).offset_in_compressed_file + == getMark(all_mark_ranges[i].end).offset_in_compressed_file)) + { + max_mark_range = max_read_buffer_size; + break; + } + + max_mark_range = std::max(max_mark_range, + getMark(right).offset_in_compressed_file - getMark(all_mark_ranges[i].begin).offset_in_compressed_file); + } + + /// Avoid empty buffer. May happen while reading dictionary for DataTypeLowCardinality. + /// For example: part has single dictionary and all marks point to the same position. + if (max_mark_range == 0) + max_mark_range = max_read_buffer_size; + + size_t buffer_size = std::min(max_read_buffer_size, max_mark_range); + + /// Estimate size of the data to be read. + size_t estimated_size = 0; + if (aio_threshold > 0) + { + for (const auto & mark_range : all_mark_ranges) + { + size_t offset_begin = (mark_range.begin > 0) + ? getMark(mark_range.begin).offset_in_compressed_file + : 0; + + size_t offset_end = (mark_range.end < marks_count) + ? getMark(mark_range.end).offset_in_compressed_file + : Poco::File(path_prefix + extension).getSize(); + + if (offset_end > offset_begin) + estimated_size += offset_end - offset_begin; + } + } + + /// Initialize the objects that shall be used to perform read operations. + if (uncompressed_cache) + { + auto buffer = std::make_unique( + path_prefix + extension, uncompressed_cache, estimated_size, aio_threshold, buffer_size); + + if (profile_callback) + buffer->setProfileCallback(profile_callback, clock_type); + + cached_buffer = std::move(buffer); + data_buffer = cached_buffer.get(); + } + else + { + auto buffer = std::make_unique( + path_prefix + extension, estimated_size, aio_threshold, buffer_size); + + if (profile_callback) + buffer->setProfileCallback(profile_callback, clock_type); + + non_cached_buffer = std::move(buffer); + data_buffer = non_cached_buffer.get(); + } +} + + +const MarkInCompressedFile & MergeTreeReaderStream::getMark(size_t index) +{ + if (!marks) + loadMarks(); + return (*marks)[index]; +} + + +void MergeTreeReaderStream::loadMarks() +{ + std::string mrk_path = path_prefix + ".mrk"; + + auto load = [&]() -> MarkCache::MappedPtr + { + /// Memory for marks must not be accounted as memory usage for query, because they are stored in shared cache. + auto temporarily_disable_memory_tracker = getCurrentMemoryTrackerActionLock(); + + size_t file_size = Poco::File(mrk_path).getSize(); + size_t expected_file_size = sizeof(MarkInCompressedFile) * marks_count; + if (expected_file_size != file_size) + throw Exception( + "bad size of marks file `" + mrk_path + "':" + std::to_string(file_size) + ", must be: " + std::to_string(expected_file_size), + ErrorCodes::CORRUPTED_DATA); + + auto res = std::make_shared(marks_count); + + /// Read directly to marks. + ReadBufferFromFile buffer(mrk_path, file_size, -1, reinterpret_cast(res->data())); + + if (buffer.eof() || buffer.buffer().size() != file_size) + throw Exception("Cannot read all marks from file " + mrk_path, ErrorCodes::CANNOT_READ_ALL_DATA); + + return res; + }; + + if (mark_cache) + { + auto key = mark_cache->hash(mrk_path); + if (save_marks_in_cache) + { + marks = mark_cache->getOrSet(key, load); + } + else + { + marks = mark_cache->get(key); + if (!marks) + marks = load(); + } + } + else + marks = load(); + + if (!marks) + throw Exception("Failed to load marks: " + mrk_path, ErrorCodes::LOGICAL_ERROR); +} + + +void MergeTreeReaderStream::seekToMark(size_t index) +{ + MarkInCompressedFile mark = getMark(index); + + try + { + if (cached_buffer) + cached_buffer->seek(mark.offset_in_compressed_file, mark.offset_in_decompressed_block); + if (non_cached_buffer) + non_cached_buffer->seek(mark.offset_in_compressed_file, mark.offset_in_decompressed_block); + } + catch (Exception & e) + { + /// Better diagnostics. + if (e.code() == ErrorCodes::ARGUMENT_OUT_OF_BOUND) + e.addMessage("(while seeking to mark " + toString(index) + + " of column " + path_prefix + "; offsets are: " + + toString(mark.offset_in_compressed_file) + " " + + toString(mark.offset_in_decompressed_block) + ")"); + + throw; + } +} + + +void MergeTreeReaderStream::seekToStart() +{ + try + { + if (cached_buffer) + cached_buffer->seek(0, 0); + if (non_cached_buffer) + non_cached_buffer->seek(0, 0); + } + catch (Exception & e) + { + /// Better diagnostics. + if (e.code() == ErrorCodes::ARGUMENT_OUT_OF_BOUND) + e.addMessage("(while seeking to start of column " + path_prefix + ")"); + + throw; + } +} + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeReaderStream.h b/dbms/src/Storages/MergeTree/MergeTreeReaderStream.h new file mode 100644 index 00000000000..e0471d14b78 --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeReaderStream.h @@ -0,0 +1,49 @@ +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +class MergeTreeReaderStream +{ +public: + MergeTreeReaderStream( + const String &path_prefix_, const String &extension_, size_t marks_count_, + const MarkRanges &all_mark_ranges, + MarkCache *mark_cache, bool save_marks_in_cache, + UncompressedCache *uncompressed_cache, + size_t aio_threshold, size_t max_read_buffer_size, + const ReadBufferFromFileBase::ProfileCallback &profile_callback, clockid_t clock_type); + + void seekToMark(size_t index); + + void seekToStart(); + + ReadBuffer *data_buffer; + +private: + MergeTreeReaderStream() = default; + + /// NOTE: lazily loads marks from the marks cache. + const MarkInCompressedFile &getMark(size_t index); + + void loadMarks(); + + std::string path_prefix; + std::string extension; + + size_t marks_count; + + MarkCache *mark_cache; + bool save_marks_in_cache; + MarkCache::MappedPtr marks; + + std::unique_ptr cached_buffer; + std::unique_ptr non_cached_buffer; +}; +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp new file mode 100644 index 00000000000..0a29a7fbd07 --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp @@ -0,0 +1,393 @@ +#include + +#include +#include +#include + +#include +#include +#include + +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INCORRECT_QUERY; +} + +MergeTreeUniqueGranule::MergeTreeUniqueGranule(const MergeTreeUniqueIndex & index) + : MergeTreeIndexGranule(), index(index), set(new Set(SizeLimits{}, true)) +{ + set->setHeader(index.header); +} + +void MergeTreeUniqueGranule::serializeBinary(WriteBuffer & ostr) const +{ + if (empty()) + throw Exception( + "Attempt to write empty unique index `" + index.name + "`", ErrorCodes::LOGICAL_ERROR); + + const auto & columns = set->getSetElements(); + const auto & size_type = DataTypePtr(std::make_shared()); + + if (index.max_rows && size() > index.max_rows) + { + size_type->serializeBinary(0, ostr); + return; + } + + size_type->serializeBinary(size(), ostr); + + for (size_t i = 0; i < index.columns.size(); ++i) + { + const auto & type = index.data_types[i]; + type->serializeBinaryBulk(*columns[i], ostr, 0, size()); + } +} + +void MergeTreeUniqueGranule::deserializeBinary(ReadBuffer & istr) +{ + if (!set->empty()) + { + auto new_set = std::make_unique(SizeLimits{}, true); + new_set->setHeader(index.header); + set.swap(new_set); + } + + Block block; + Field field_rows; + const auto & size_type = DataTypePtr(std::make_shared()); + size_type->deserializeBinary(field_rows, istr); + size_t rows_to_read = field_rows.get(); + + for (size_t i = 0; i < index.columns.size(); ++i) + { + const auto & type = index.data_types[i]; + auto new_column = type->createColumn(); + type->deserializeBinaryBulk(*new_column, istr, rows_to_read, 0); + + block.insert(ColumnWithTypeAndName(new_column->getPtr(), type, index.columns[i])); + } + + set->insertFromBlock(block); +} + +String MergeTreeUniqueGranule::toString() const +{ + String res = ""; + + const auto & columns = set->getSetElements(); + for (size_t i = 0; i < index.columns.size(); ++i) + { + const auto & column = columns[i]; + res += " ["; + for (size_t j = 0; j < column->size(); ++j) + { + if (j != 0) + res += ", "; + Field field; + column->get(j, field); + res += applyVisitor(FieldVisitorToString(), field); + } + res += "]\n"; + } + + return res; +} + +void MergeTreeUniqueGranule::update(const Block & new_block, size_t * pos, size_t limit) +{ + size_t rows_read = std::min(limit, new_block.rows() - *pos); + + if (index.max_rows && size() > index.max_rows) + { + *pos += rows_read; + return; + } + + Block key_block; + for (size_t i = 0; i < index.columns.size(); ++i) + { + const auto & name = index.columns[i]; + const auto & type = index.data_types[i]; + key_block.insert( + ColumnWithTypeAndName( + new_block.getByName(name).column->cut(*pos, rows_read), + type, + name)); + } + + set->insertFromBlock(key_block); + + *pos += rows_read; +} + +Block MergeTreeUniqueGranule::getElementsBlock() const +{ + if (index.max_rows && size() > index.max_rows) + return index.header; + return index.header.cloneWithColumns(set->getSetElements()); +} + + +UniqueCondition::UniqueCondition( + const SelectQueryInfo & query, + const Context & context, + const MergeTreeUniqueIndex &index) + : IndexCondition(), index(index) +{ + for (size_t i = 0, size = index.columns.size(); i < size; ++i) + { + std::string name = index.columns[i]; + if (!key_columns.count(name)) + key_columns.insert(name); + } + + const ASTSelectQuery & select = typeid_cast(*query.query); + + /// Replace logical functions with bit functions. + /// Working with UInt8: last bit = can be true, previous = can be false. + ASTPtr new_expression; + if (select.where_expression && select.prewhere_expression) + new_expression = makeASTFunction( + "and", + select.where_expression->clone(), + select.prewhere_expression->clone()); + else if (select.where_expression) + new_expression = select.where_expression->clone(); + else if (select.prewhere_expression) + new_expression = select.prewhere_expression->clone(); + else + /// 0b11 -- can be true and false at the same time + new_expression = std::make_shared(Field(3)); + + useless = checkASTAlwaysUnknownOrTrue(new_expression); + /// Do not proceed if index is useless for this query. + if (useless) + return; + + expression_ast = makeASTFunction( + "bitAnd", + new_expression, + std::make_shared(Field(1))); + + traverseAST(expression_ast); + + auto syntax_analyzer_result = SyntaxAnalyzer(context, {}).analyze( + expression_ast, index.header.getNamesAndTypesList()); + actions = ExpressionAnalyzer(expression_ast, syntax_analyzer_result, context).getActions(true); +} + +bool UniqueCondition::alwaysUnknownOrTrue() const +{ + return useless; +} + +bool UniqueCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const +{ + auto granule = std::dynamic_pointer_cast(idx_granule); + if (!granule) + throw Exception( + "Unique index condition got wrong granule", ErrorCodes::LOGICAL_ERROR); + + if (useless) + return true; + + if (index.max_rows && granule->size() > index.max_rows) + return true; + + Block result = granule->getElementsBlock(); + actions->execute(result); + + + const auto & column = result.getByName(expression_ast->getColumnName()).column; + + for (size_t i = 0; i < column->size(); ++i) + if (column->getBool(i)) + return true; + + return false; +} + +void UniqueCondition::traverseAST(ASTPtr & node) const +{ + if (operatorFromAST(node)) + { + auto * func = typeid_cast(&*node); + auto & args = typeid_cast(*func->arguments).children; + + for (auto & arg : args) + traverseAST(arg); + return; + } + + if (!atomFromAST(node)) + node = std::make_shared(Field(3)); /// can_be_true=1 can_be_false=1 +} + +bool UniqueCondition::atomFromAST(ASTPtr & node) const +{ + /// Function, literal or column + + if (typeid_cast(node.get())) + return true; + + if (const auto * identifier = typeid_cast(node.get())) + return key_columns.count(identifier->getColumnName()) != 0; + + if (auto * func = typeid_cast(node.get())) + { + if (key_columns.count(func->getColumnName())) + { + /// Function is already calculated. + node = std::make_shared(func->getColumnName()); + return true; + } + + ASTs & args = typeid_cast(*func->arguments).children; + + for (auto & arg : args) + if (!atomFromAST(arg)) + return false; + + return true; + } + + return false; +} + +bool UniqueCondition::operatorFromAST(ASTPtr & node) const +{ + /// Functions AND, OR, NOT. Replace with bit*. + auto * func = typeid_cast(&*node); + if (!func) + return false; + + const ASTs & args = typeid_cast(*func->arguments).children; + + if (func->name == "not") + { + if (args.size() != 1) + return false; + + func->name = "__bitSwapLastTwo"; + } + else if (func->name == "and" || func->name == "indexHint") + func->name = "bitAnd"; + else if (func->name == "or") + func->name = "bitOr"; + else + return false; + + return true; +} + +bool checkAtomName(const String & name) +{ + static std::set atoms = { + "notEquals", + "equals", + "less", + "greater", + "lessOrEquals", + "greaterOrEquals", + "in", + "notIn", + "like" + }; + return atoms.find(name) != atoms.end(); +} + +bool UniqueCondition::checkASTAlwaysUnknownOrTrue(const ASTPtr & node, bool atomic) const +{ + if (const auto * func = typeid_cast(node.get())) + { + if (key_columns.count(func->getColumnName())) + return false; + + const ASTs & args = typeid_cast(*func->arguments).children; + + if (func->name == "and" || func->name == "indexHint") + return checkASTAlwaysUnknownOrTrue(args[0], atomic) && checkASTAlwaysUnknownOrTrue(args[1], atomic); + else if (func->name == "or") + return checkASTAlwaysUnknownOrTrue(args[0], atomic) || checkASTAlwaysUnknownOrTrue(args[1], atomic); + else if (func->name == "not") + return checkASTAlwaysUnknownOrTrue(args[0], atomic); + else if (!atomic && checkAtomName(func->name)) + return checkASTAlwaysUnknownOrTrue(node, true); + else + return std::any_of(args.begin(), args.end(), + [this, &atomic](const auto & arg) { return checkASTAlwaysUnknownOrTrue(arg, atomic); }); + } + else if (const auto * literal = typeid_cast(node.get())) + return !atomic && literal->value.get(); + else if (const auto * identifier = typeid_cast(node.get())) + return key_columns.find(identifier->getColumnName()) == key_columns.end(); + else + return true; +} + + +MergeTreeIndexGranulePtr MergeTreeUniqueIndex::createIndexGranule() const +{ + return std::make_shared(*this); +} + +IndexConditionPtr MergeTreeUniqueIndex::createIndexCondition( + const SelectQueryInfo & query, const Context & context) const +{ + return std::make_shared(query, context, *this); +}; + + +std::unique_ptr MergeTreeUniqueIndexCreator( + const NamesAndTypesList & new_columns, + std::shared_ptr node, + const Context & context) +{ + if (node->name.empty()) + throw Exception("Index must have unique name", ErrorCodes::INCORRECT_QUERY); + + size_t max_rows = 0; + if (node->type->arguments) + { + if (node->type->arguments->children.size() > 1) + throw Exception("Unique index cannot have only 0 or 1 argument", ErrorCodes::INCORRECT_QUERY); + else if (node->type->arguments->children.size() == 1) + max_rows = typeid_cast( + *node->type->arguments->children[0]).value.get(); + } + + + ASTPtr expr_list = MergeTreeData::extractKeyExpressionList(node->expr->clone()); + auto syntax = SyntaxAnalyzer(context, {}).analyze( + expr_list, new_columns); + auto unique_expr = ExpressionAnalyzer(expr_list, syntax, context).getActions(false); + + auto sample = ExpressionAnalyzer(expr_list, syntax, context) + .getActions(true)->getSampleBlock(); + + Block header; + + Names columns; + DataTypes data_types; + + for (size_t i = 0; i < expr_list->children.size(); ++i) + { + const auto & column = sample.getByPosition(i); + + columns.emplace_back(column.name); + data_types.emplace_back(column.type); + + header.insert(ColumnWithTypeAndName(column.type->createColumn(), column.type, column.name)); + } + + return std::make_unique( + node->name, std::move(unique_expr), columns, data_types, header, node->granularity.get(), max_rows);; +} + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h new file mode 100644 index 00000000000..6b1107dffd6 --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h @@ -0,0 +1,93 @@ +#pragma once + +#include +#include + +#include + +#include +#include + + +namespace DB +{ + +class MergeTreeUniqueIndex; + +struct MergeTreeUniqueGranule : public MergeTreeIndexGranule +{ + explicit MergeTreeUniqueGranule(const MergeTreeUniqueIndex & index); + + void serializeBinary(WriteBuffer & ostr) const override; + void deserializeBinary(ReadBuffer & istr) override; + + String toString() const override; + size_t size() const { return set->getTotalRowCount(); } + bool empty() const override { return !size(); } + + void update(const Block & block, size_t * pos, size_t limit) override; + Block getElementsBlock() const; + + ~MergeTreeUniqueGranule() override = default; + + const MergeTreeUniqueIndex & index; + std::unique_ptr set; +}; + + +class UniqueCondition : public IndexCondition +{ +public: + UniqueCondition( + const SelectQueryInfo & query, + const Context & context, + const MergeTreeUniqueIndex & index); + + bool alwaysUnknownOrTrue() const override; + + bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const override; + + ~UniqueCondition() override = default; +private: + void traverseAST(ASTPtr & node) const; + bool atomFromAST(ASTPtr & node) const; + bool operatorFromAST(ASTPtr & node) const; + + bool checkASTAlwaysUnknownOrTrue(const ASTPtr & node, bool atomic = false) const; + + const MergeTreeUniqueIndex & index; + + bool useless; + std::set key_columns; + ASTPtr expression_ast; + ExpressionActionsPtr actions; +}; + + +class MergeTreeUniqueIndex : public MergeTreeIndex +{ +public: + MergeTreeUniqueIndex( + String name, + ExpressionActionsPtr expr, + const Names & columns, + const DataTypes & data_types, + const Block & header, + size_t granularity, + size_t _max_rows) + : MergeTreeIndex(std::move(name), std::move(expr), columns, data_types, header, granularity), max_rows(_max_rows) {} + + ~MergeTreeUniqueIndex() override = default; + + MergeTreeIndexGranulePtr createIndexGranule() const override; + + IndexConditionPtr createIndexCondition( + const SelectQueryInfo & query, const Context & context) const override; + + size_t max_rows = 0; +}; + +std::unique_ptr MergeTreeUniqueIndexCreator( + const NamesAndTypesList & columns, std::shared_ptr node, const Context & context); + +} diff --git a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp index b64543124b8..47cdd9755f0 100644 --- a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -16,6 +16,7 @@ namespace constexpr auto DATA_FILE_EXTENSION = ".bin"; constexpr auto MARKS_FILE_EXTENSION = ".mrk"; +constexpr auto INDEX_FILE_EXTENSION = ".idx"; } @@ -325,6 +326,18 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart( } } + /// Finish skip index serialization + for (size_t i = 0; i < storage.skip_indices.size(); ++i) + { + auto & stream = *skip_indices_streams[i]; + if (skip_indices_granules[i] && !skip_indices_granules[i]->empty()) + { + skip_indices_granules[i]->serializeBinary(stream.compressed); + skip_indices_granules[i].reset(); + } + } + + if (!total_column_list) total_column_list = &columns_list; @@ -342,6 +355,16 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart( index_stream = nullptr; } + for (auto & stream : skip_indices_streams) + { + stream->finalize(); + stream->addToChecksums(checksums); + } + + skip_indices_streams.clear(); + skip_indices_granules.clear(); + skip_index_filling.clear(); + for (ColumnStreams::iterator it = column_streams.begin(); it != column_streams.end(); ++it) { it->second->finalize(); @@ -398,6 +421,21 @@ void MergedBlockOutputStream::init() part_path + "primary.idx", DBMS_DEFAULT_BUFFER_SIZE, O_TRUNC | O_CREAT | O_WRONLY); index_stream = std::make_unique(*index_file_stream); } + + for (const auto & index : storage.skip_indices) + { + String stream_name = index->getFileName(); + skip_indices_streams.emplace_back( + std::make_unique( + stream_name, + part_path + stream_name, INDEX_FILE_EXTENSION, + part_path + stream_name, MARKS_FILE_EXTENSION, + codec, max_compress_block_size, + 0, aio_threshold)); + + skip_indices_granules.emplace_back(nullptr); + skip_index_filling.push_back(0); + } } @@ -410,6 +448,9 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm WrittenOffsetColumns offset_columns; auto primary_key_column_names = storage.primary_key_columns; + Names skip_indexes_column_names; + for (const auto & index : storage.skip_indices) + std::copy(index->columns.cbegin(), index->columns.cend(), std::back_inserter(skip_indexes_column_names)); /// Here we will add the columns related to the Primary Key, then write the index. std::vector primary_key_columns(primary_key_column_names.size()); @@ -429,6 +470,21 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm primary_key_columns[i].column = primary_key_columns[i].column->permute(*permutation, 0); } + /// The same for skip indexes columns + std::vector skip_indexes_columns(skip_indexes_column_names.size()); + std::map skip_indexes_column_name_to_position; + + for (size_t i = 0, size = skip_indexes_column_names.size(); i < size; ++i) + { + const auto & name = skip_indexes_column_names[i]; + skip_indexes_column_name_to_position.emplace(name, i); + skip_indexes_columns[i] = block.getByName(name); + + /// Reorder index columns in advance. + if (permutation) + skip_indexes_columns[i].column = skip_indexes_columns[i].column->permute(*permutation, 0); + } + if (index_columns.empty()) { index_columns.resize(primary_key_column_names.size()); @@ -459,11 +515,17 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm if (permutation) { auto primary_column_it = primary_key_column_name_to_position.find(it->name); + auto skip_index_column_it = skip_indexes_column_name_to_position.find(it->name); if (primary_key_column_name_to_position.end() != primary_column_it) { - auto & primary_column = *primary_key_columns[primary_column_it->second].column; + const auto & primary_column = *primary_key_columns[primary_column_it->second].column; writeData(column.name, *column.type, primary_column, offset_columns, false, serialization_states[i]); } + else if (skip_indexes_column_name_to_position.end() != skip_index_column_it) + { + const auto & index_column = *skip_indexes_columns[skip_index_column_it->second].column; + writeData(column.name, *column.type, index_column, offset_columns, false, serialization_states[i]); + } else { /// We rearrange the columns that are not included in the primary key here; Then the result is released - to save RAM. @@ -479,6 +541,57 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm rows_count += rows; + { + /// Filling and writing skip indices like in IMergedBlockOutputStream::writeData + for (size_t i = 0; i < storage.skip_indices.size(); ++i) + { + const auto index = storage.skip_indices[i]; + auto & stream = *skip_indices_streams[i]; + size_t prev_pos = 0; + + while (prev_pos < rows) + { + size_t limit = 0; + if (prev_pos == 0 && index_offset != 0) + { + limit = index_offset; + } + else + { + limit = storage.index_granularity; + if (!skip_indices_granules[i]) + { + skip_indices_granules[i] = index->createIndexGranule(); + skip_index_filling[i] = 0; + + if (stream.compressed.offset() >= min_compress_block_size) + stream.compressed.next(); + + writeIntBinary(stream.plain_hashing.count(), stream.marks); + writeIntBinary(stream.compressed.offset(), stream.marks); + } + } + + size_t pos = prev_pos; + skip_indices_granules[i]->update(block, &pos, limit); + + if (pos == prev_pos + limit) + { + ++skip_index_filling[i]; + + /// write index if it is filled + if (skip_index_filling[i] == index->granularity) + { + skip_indices_granules[i]->serializeBinary(stream.compressed); + skip_indices_granules[i].reset(); + skip_index_filling[i] = 0; + } + } + prev_pos = pos; + } + } + } + { /** While filling index (index_columns), disable memory tracker. * Because memory is allocated here (maybe in context of INSERT query), diff --git a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h index 0ae8e83aafb..6bc6e90e887 100644 --- a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h +++ b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h @@ -149,6 +149,10 @@ private: std::unique_ptr index_file_stream; std::unique_ptr index_stream; MutableColumns index_columns; + + std::vector> skip_indices_streams; + MergeTreeIndexGranules skip_indices_granules; + std::vector skip_index_filling; }; diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp index e8f918448e4..d6295fb130a 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp @@ -145,14 +145,14 @@ void ReplicatedMergeTreeAlterThread::run() parts = storage.data.getDataParts(); const auto columns_for_parts = storage.getColumns().getAllPhysical(); + const auto indices_for_parts = storage.getIndicesDescription(); for (const MergeTreeData::DataPartPtr & part : parts) { /// Update the part and write result to temporary files. /// TODO: You can skip checking for too large changes if ZooKeeper has, for example, /// node /flags/force_alter. - auto transaction = storage.data.alterDataPart(part, columns_for_parts, false); - + auto transaction = storage.data.alterDataPart(part, columns_for_parts, indices_for_parts.indices, false); if (!transaction) continue; diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index 2db01943a46..61ebaa710da 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -238,6 +238,7 @@ void ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name) storage.data.index_granularity, true, storage.data.primary_key_data_types, + storage.data.skip_indices, [this] { return need_stop.load(); }); if (need_stop) diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index ae5249d3d16..88b2e6ea1c7 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -44,6 +44,8 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr if (data.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) partition_key = formattedAST(MergeTreeData::extractKeyExpressionList(data.partition_by_ast)); + + skip_indices = data.getIndicesDescription().toString(); } void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const @@ -64,6 +66,9 @@ void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const if (!sorting_key.empty()) out << "sorting key: " << sorting_key << "\n"; + + if (!skip_indices.empty()) + out << "indices: " << skip_indices << "\n"; } String ReplicatedMergeTreeTableMetadata::toString() const @@ -93,6 +98,9 @@ void ReplicatedMergeTreeTableMetadata::read(ReadBuffer & in) if (checkString("sorting key: ", in)) in >> sorting_key >> "\n"; + + if (checkString("indices: ", in)) + in >> skip_indices >> "\n"; } ReplicatedMergeTreeTableMetadata ReplicatedMergeTreeTableMetadata::parse(const String & s) @@ -175,6 +183,21 @@ ReplicatedMergeTreeTableMetadata::checkAndFindDiff(const ReplicatedMergeTreeTabl ErrorCodes::METADATA_MISMATCH); } + if (skip_indices != from_zk.skip_indices) + { + if (allow_alter) + { + diff.skip_indices_changed = true; + diff.new_skip_indices = from_zk.skip_indices; + } + else + throw Exception( + "Existing table metadata in ZooKeeper differs in skip indexes." + " Stored in ZooKeeper: " + from_zk.skip_indices + + ", local: " + skip_indices, + ErrorCodes::METADATA_MISMATCH); + } + return diff; } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h index b063e226348..e350058473b 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h @@ -25,6 +25,7 @@ struct ReplicatedMergeTreeTableMetadata MergeTreeDataFormatVersion data_format_version; String partition_key; String sorting_key; + String skip_indices; ReplicatedMergeTreeTableMetadata() = default; explicit ReplicatedMergeTreeTableMetadata(const MergeTreeData & data); @@ -40,7 +41,10 @@ struct ReplicatedMergeTreeTableMetadata bool sorting_key_changed = false; String new_sorting_key; - bool empty() const { return !sorting_key_changed; } + bool skip_indices_changed = false; + String new_skip_indices; + + bool empty() const { return !sorting_key_changed && !skip_indices_changed; } }; Diff checkAndFindDiff(const ReplicatedMergeTreeTableMetadata & from_zk, bool allow_alter) const; diff --git a/dbms/src/Storages/MergeTree/checkDataPart.cpp b/dbms/src/Storages/MergeTree/checkDataPart.cpp index e88f027ad75..40ed5021ea7 100644 --- a/dbms/src/Storages/MergeTree/checkDataPart.cpp +++ b/dbms/src/Storages/MergeTree/checkDataPart.cpp @@ -30,12 +30,13 @@ namespace ErrorCodes namespace { -/** To read and checksum single stream (a pair of .bin, .mrk files) for a single column. +/** To read and checksum single stream (a pair of .bin, .mrk files) for a single column or secondary index. */ class Stream { public: String base_name; + String bin_file_ext; String bin_file_path; String mrk_file_path; private: @@ -50,10 +51,11 @@ private: public: HashingReadBuffer mrk_hashing_buf; - Stream(const String & path, const String & base_name) + Stream(const String & path, const String & base_name, const String & bin_file_ext = ".bin") : base_name(base_name), - bin_file_path(path + base_name + ".bin"), + bin_file_ext(bin_file_ext), + bin_file_path(path + base_name + bin_file_ext), mrk_file_path(path + base_name + ".mrk"), file_buf(bin_file_path), compressed_hashing_buf(file_buf), @@ -118,7 +120,7 @@ public: void saveChecksums(MergeTreeData::DataPart::Checksums & checksums) { - checksums.files[base_name + ".bin"] = MergeTreeData::DataPart::Checksums::Checksum( + checksums.files[base_name + bin_file_ext] = MergeTreeData::DataPart::Checksums::Checksum( compressed_hashing_buf.count(), compressed_hashing_buf.getHash(), uncompressed_hashing_buf.count(), uncompressed_hashing_buf.getHash()); @@ -135,6 +137,7 @@ MergeTreeData::DataPart::Checksums checkDataPart( size_t index_granularity, bool require_checksums, const DataTypes & primary_key_data_types, + const MergeTreeIndices & indices, std::function is_cancelled) { Logger * log = &Logger::get("checkDataPart"); @@ -239,6 +242,48 @@ MergeTreeData::DataPart::Checksums checkDataPart( rows = count; } + /// Read and check skip indices. + for (const auto & index : indices) + { + Stream stream(path, index->getFileName(), ".idx"); + size_t mark_num = 0; + + while (!stream.uncompressed_hashing_buf.eof()) + { + if (stream.mrk_hashing_buf.eof()) + throw Exception("Unexpected end of mrk file while reading index " + index->name, + ErrorCodes::CORRUPTED_DATA); + try + { + stream.assertMark(); + } + catch (Exception &e) + { + e.addMessage("Cannot read mark " + toString(mark_num) + + " in file " + stream.mrk_file_path + + ", mrk file offset: " + toString(stream.mrk_hashing_buf.count())); + throw; + } + try + { + index->createIndexGranule()->deserializeBinary(stream.uncompressed_hashing_buf); + } + catch (Exception &e) + { + e.addMessage("Cannot read granule " + toString(mark_num) + + " in file " + stream.bin_file_path + + ", mrk file offset: " + toString(stream.mrk_hashing_buf.count())); + throw; + } + ++mark_num; + if (is_cancelled()) + return {}; + } + + stream.assertEnd(); + stream.saveChecksums(checksums_data); + } + /// Read all columns, calculate checksums and validate marks. for (const NameAndTypePair & name_type : columns) { diff --git a/dbms/src/Storages/MergeTree/checkDataPart.h b/dbms/src/Storages/MergeTree/checkDataPart.h index 584729a0029..5a223fce02c 100644 --- a/dbms/src/Storages/MergeTree/checkDataPart.h +++ b/dbms/src/Storages/MergeTree/checkDataPart.h @@ -17,6 +17,7 @@ MergeTreeData::DataPart::Checksums checkDataPart( size_t index_granularity, bool require_checksums, const DataTypes & primary_key_data_types, /// Check the primary key. If it is not necessary, pass an empty array. + const MergeTreeIndices & indices = {}, /// Check skip indices std::function is_cancelled = []{ return false; }); } diff --git a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp index e0903138220..ed0a859e495 100644 --- a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -1,6 +1,9 @@ #include #include #include +#include +#include +#include #include #include @@ -336,7 +339,8 @@ static StoragePtr create(const StorageFactory::Arguments & args) */ bool is_extended_storage_def = - args.storage_def->partition_by || args.storage_def->primary_key || args.storage_def->order_by || args.storage_def->sample_by || args.storage_def->settings; + args.storage_def->partition_by || args.storage_def->primary_key || args.storage_def->order_by + || args.storage_def->sample_by || (args.query.columns_list->indices && !args.query.columns_list->indices->children.empty()) || args.storage_def->settings; String name_part = args.engine_name.substr(0, args.engine_name.size() - strlen("MergeTree")); @@ -551,6 +555,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) ASTPtr order_by_ast; ASTPtr primary_key_ast; ASTPtr sample_by_ast; + IndicesDescription indices_description; MergeTreeSettings storage_settings = args.context.getMergeTreeSettings(); if (is_extended_storage_def) @@ -571,6 +576,11 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (args.storage_def->sample_by) sample_by_ast = args.storage_def->sample_by->ptr(); + if (args.query.columns_list && args.query.columns_list->indices) + for (const auto & index : args.query.columns_list->indices->children) + indices_description.indices.push_back( + std::dynamic_pointer_cast(index->ptr())); + storage_settings.loadFromQuery(*args.storage_def); } else @@ -603,16 +613,22 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (replicated) return StorageReplicatedMergeTree::create( zookeeper_path, replica_name, args.attach, args.data_path, args.database_name, args.table_name, - args.columns, + args.columns, indices_description, args.context, date_column_name, partition_by_ast, order_by_ast, primary_key_ast, - sample_by_ast, merging_params, storage_settings, - args.has_force_restore_data_flag); + sample_by_ast, merging_params, storage_settings, args.has_force_restore_data_flag); else return StorageMergeTree::create( - args.data_path, args.database_name, args.table_name, args.columns, args.attach, - args.context, date_column_name, partition_by_ast, order_by_ast, primary_key_ast, - sample_by_ast, merging_params, storage_settings, - args.has_force_restore_data_flag); + args.data_path, args.database_name, args.table_name, args.columns, indices_description, + args.attach, args.context, date_column_name, partition_by_ast, order_by_ast, + primary_key_ast, sample_by_ast, merging_params, storage_settings, args.has_force_restore_data_flag); +} + + +static void registerMergeTreeSkipIndices() +{ + auto & factory = MergeTreeIndexFactory::instance(); + factory.registerIndex("minmax", MergeTreeMinMaxIndexCreator); + factory.registerIndex("unique", MergeTreeUniqueIndexCreator); } @@ -633,6 +649,8 @@ void registerStorageMergeTree(StorageFactory & factory) factory.registerStorage("ReplicatedSummingMergeTree", create); factory.registerStorage("ReplicatedGraphiteMergeTree", create); factory.registerStorage("ReplicatedVersionedCollapsingMergeTree", create); + + registerMergeTreeSkipIndices(); } } diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index 6f4c07c6eba..9706b1d5562 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -684,9 +684,10 @@ void StorageBuffer::alter(const AlterCommands & params, const String & database_ /// So that no blocks of the old structure remain. optimize({} /*query*/, {} /*partition_id*/, false /*final*/, false /*deduplicate*/, context); - ColumnsDescription new_columns = getColumns(); + auto new_columns = getColumns(); + auto new_indices = getIndicesDescription(); params.apply(new_columns); - context.getDatabase(database_name)->alterTable(context, table_name, new_columns, {}); + context.getDatabase(database_name)->alterTable(context, table_name, new_columns, new_indices, {}); setColumns(std::move(new_columns)); } diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index 8f4f31d458c..8ea077b671f 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -339,9 +339,10 @@ void StorageDistributed::alter(const AlterCommands & params, const String & data { auto lock = lockStructureForAlter(); - ColumnsDescription new_columns = getColumns(); + auto new_columns = getColumns(); + auto new_indices = getIndicesDescription(); params.apply(new_columns); - context.getDatabase(database_name)->alterTable(context, current_table_name, new_columns, {}); + context.getDatabase(database_name)->alterTable(context, current_table_name, new_columns, new_indices, {}); setColumns(std::move(new_columns)); } diff --git a/dbms/src/Storages/StorageFactory.cpp b/dbms/src/Storages/StorageFactory.cpp index 333933d449f..319258e3adf 100644 --- a/dbms/src/Storages/StorageFactory.cpp +++ b/dbms/src/Storages/StorageFactory.cpp @@ -94,11 +94,12 @@ StoragePtr StorageFactory::get( ErrorCodes::BAD_ARGUMENTS); } - if ((storage_def->partition_by || storage_def->primary_key || storage_def->order_by || storage_def->sample_by) + if ((storage_def->partition_by || storage_def->primary_key || storage_def->order_by || storage_def->sample_by || + (query.columns_list && query.columns_list->indices && !query.columns_list->indices->children.empty())) && !endsWith(name, "MergeTree")) { throw Exception( - "Engine " + name + " doesn't support PARTITION BY, PRIMARY KEY, ORDER BY or SAMPLE BY clauses. " + "Engine " + name + " doesn't support PARTITION BY, PRIMARY KEY, ORDER BY or SAMPLE BY clauses and skipping indices. " "Currently only the MergeTree family of engines supports them", ErrorCodes::BAD_ARGUMENTS); } diff --git a/dbms/src/Storages/StorageMaterializedView.cpp b/dbms/src/Storages/StorageMaterializedView.cpp index e2444862ca8..ae33cac8427 100644 --- a/dbms/src/Storages/StorageMaterializedView.cpp +++ b/dbms/src/Storages/StorageMaterializedView.cpp @@ -139,7 +139,11 @@ StorageMaterializedView::StorageMaterializedView( auto manual_create_query = std::make_shared(); manual_create_query->database = target_database_name; manual_create_query->table = target_table_name; - manual_create_query->set(manual_create_query->columns, query.columns->ptr()); + + auto new_columns_list = std::make_shared(); + new_columns_list->set(new_columns_list->columns, query.columns_list->columns->ptr()); + + manual_create_query->set(manual_create_query->columns_list, new_columns_list); manual_create_query->set(manual_create_query->storage, query.storage->ptr()); /// Execute the query. diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp index 754d5e4fdfe..406404c2293 100644 --- a/dbms/src/Storages/StorageMerge.cpp +++ b/dbms/src/Storages/StorageMerge.cpp @@ -398,9 +398,10 @@ void StorageMerge::alter(const AlterCommands & params, const String & database_n { auto lock = lockStructureForAlter(); - ColumnsDescription new_columns = getColumns(); + auto new_columns = getColumns(); + auto new_indices = getIndicesDescription(); params.apply(new_columns); - context.getDatabase(database_name)->alterTable(context, table_name, new_columns, {}); + context.getDatabase(database_name)->alterTable(context, table_name, new_columns, new_indices, {}); setColumns(new_columns); } diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 7a91ee41338..06c21926673 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -51,6 +51,7 @@ StorageMergeTree::StorageMergeTree( const String & database_name_, const String & table_name_, const ColumnsDescription & columns_, + const IndicesDescription & indices_, bool attach, Context & context_, const String & date_column_name, @@ -64,10 +65,9 @@ StorageMergeTree::StorageMergeTree( : path(path_), database_name(database_name_), table_name(table_name_), full_path(path + escapeForFileName(table_name) + '/'), global_context(context_), background_pool(context_.getBackgroundPool()), data(database_name, table_name, - full_path, columns_, + full_path, columns_, indices_, context_, date_column_name, partition_by_ast_, order_by_ast_, primary_key_ast_, - sample_by_ast_, merging_params_, - settings_, false, attach), + sample_by_ast_, merging_params_, settings_, false, attach), reader(data), writer(data), merger_mutator(data, global_context.getBackgroundPool()), log(&Logger::get(database_name_ + "." + table_name + " (StorageMergeTree)")) { @@ -199,8 +199,9 @@ void StorageMergeTree::alter( { auto table_soft_lock = lockStructureForAlter(); auto new_columns = getColumns(); + auto new_indices = getIndicesDescription(); params.apply(new_columns); - context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, {}); + context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, {}); setColumns(std::move(new_columns)); return; } @@ -213,16 +214,17 @@ void StorageMergeTree::alter( data.checkAlter(params); auto new_columns = data.getColumns(); + auto new_indices = data.getIndicesDescription(); ASTPtr new_order_by_ast = data.order_by_ast; ASTPtr new_primary_key_ast = data.primary_key_ast; - params.apply(new_columns, new_order_by_ast, new_primary_key_ast); + params.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast); auto parts = data.getDataParts({MergeTreeDataPartState::PreCommitted, MergeTreeDataPartState::Committed, MergeTreeDataPartState::Outdated}); auto columns_for_parts = new_columns.getAllPhysical(); std::vector transactions; for (const MergeTreeData::DataPartPtr & part : parts) { - if (auto transaction = data.alterDataPart(part, columns_for_parts, false)) + if (auto transaction = data.alterDataPart(part, columns_for_parts, new_indices.indices, false)) transactions.push_back(std::move(transaction)); } @@ -239,10 +241,10 @@ void StorageMergeTree::alter( storage_ast.set(storage_ast.primary_key, new_primary_key_ast); }; - context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, storage_modifier); + context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, storage_modifier); /// Reinitialize primary key because primary key column types might have changed. - data.setPrimaryKeyAndColumns(new_order_by_ast, new_primary_key_ast, new_columns); + data.setPrimaryKeyIndicesAndColumns(new_order_by_ast, new_primary_key_ast, new_columns, new_indices); for (auto & transaction : transactions) transaction->commit(); @@ -698,9 +700,10 @@ void StorageMergeTree::clearColumnInPartition(const ASTPtr & partition, const Fi alter_command.column_name = get(column_name); auto new_columns = getColumns(); + auto new_indices = getIndicesDescription(); ASTPtr ignored_order_by_ast; ASTPtr ignored_primary_key_ast; - alter_command.apply(new_columns, ignored_order_by_ast, ignored_primary_key_ast); + alter_command.apply(new_columns, new_indices, ignored_order_by_ast, ignored_primary_key_ast); auto columns_for_parts = new_columns.getAllPhysical(); for (const auto & part : parts) @@ -708,7 +711,7 @@ void StorageMergeTree::clearColumnInPartition(const ASTPtr & partition, const Fi if (part->info.partition_id != partition_id) throw Exception("Unexpected partition ID " + part->info.partition_id + ". This is a bug.", ErrorCodes::LOGICAL_ERROR); - if (auto transaction = data.alterDataPart(part, columns_for_parts, false)) + if (auto transaction = data.alterDataPart(part, columns_for_parts, new_indices.indices, false)) transactions.push_back(std::move(transaction)); LOG_DEBUG(log, "Removing column " << get(column_name) << " from part " << part->name); diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index aaf68c509f6..0d6b662abc3 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -43,6 +43,9 @@ public: const ColumnsDescription & getColumns() const override { return data.getColumns(); } void setColumns(ColumnsDescription columns_) override { return data.setColumns(std::move(columns_)); } + virtual const IndicesDescription & getIndicesDescription() const override { return data.getIndicesDescription(); } + virtual void setIndicesDescription(IndicesDescription indices_) override { data.setIndicesDescription(std::move(indices_)); } + NameAndTypePair getColumn(const String & column_name) const override { return data.getColumn(column_name); } bool hasColumn(const String & column_name) const override { return data.hasColumn(column_name); } @@ -167,6 +170,7 @@ protected: const String & database_name_, const String & table_name_, const ColumnsDescription & columns_, + const IndicesDescription & indices_, bool attach, Context & context_, const String & date_column_name, diff --git a/dbms/src/Storages/StorageNull.cpp b/dbms/src/Storages/StorageNull.cpp index d23680b1c1a..59c46fcafd2 100644 --- a/dbms/src/Storages/StorageNull.cpp +++ b/dbms/src/Storages/StorageNull.cpp @@ -35,8 +35,9 @@ void StorageNull::alter(const AlterCommands & params, const String & current_dat auto lock = lockStructureForAlter(); ColumnsDescription new_columns = getColumns(); + IndicesDescription new_indices = getIndicesDescription(); params.apply(new_columns); - context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, {}); + context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, {}); setColumns(std::move(new_columns)); } diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index fc1820e36be..73842cbbbc2 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -113,6 +113,7 @@ namespace ErrorCodes extern const int KEEPER_EXCEPTION; extern const int ALL_REPLICAS_LOST; extern const int REPLICA_STATUS_CHANGED; + extern const int INCORRECT_QUERY; } namespace ActionLocks @@ -200,6 +201,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( bool attach, const String & path_, const String & database_name_, const String & name_, const ColumnsDescription & columns_, + const IndicesDescription & indices_, Context & context_, const String & date_column_name, const ASTPtr & partition_by_ast_, @@ -215,10 +217,9 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( zookeeper_path(global_context.getMacros()->expand(zookeeper_path_, database_name, table_name)), replica_name(global_context.getMacros()->expand(replica_name_, database_name, table_name)), data(database_name, table_name, - full_path, columns_, + full_path, columns_, indices_, context_, date_column_name, partition_by_ast_, order_by_ast_, primary_key_ast_, - sample_by_ast_, merging_params_, - settings_, true, attach, + sample_by_ast_, merging_params_, settings_, true, attach, [this] (const std::string & name) { enqueuePartForCheck(name); }), reader(data), writer(data), merger_mutator(data, global_context.getBackgroundPool()), queue(*this), fetcher(data), @@ -417,27 +418,34 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column { ASTPtr new_primary_key_ast = data.primary_key_ast; ASTPtr new_order_by_ast = data.order_by_ast; + auto new_indices = data.getIndicesDescription(); IDatabase::ASTModifier storage_modifier; if (!metadata_diff.empty()) { - ParserNotEmptyExpressionList parser(false); - auto new_sorting_key_expr_list = parseQuery(parser, metadata_diff.new_sorting_key, 0); - - if (new_sorting_key_expr_list->children.size() == 1) - new_order_by_ast = new_sorting_key_expr_list->children[0]; - else + if (metadata_diff.sorting_key_changed) { - auto tuple = makeASTFunction("tuple"); - tuple->arguments->children = new_sorting_key_expr_list->children; - new_order_by_ast = tuple; + ParserNotEmptyExpressionList parser(false); + auto new_sorting_key_expr_list = parseQuery(parser, metadata_diff.new_sorting_key, 0); + + if (new_sorting_key_expr_list->children.size() == 1) + new_order_by_ast = new_sorting_key_expr_list->children[0]; + else + { + auto tuple = makeASTFunction("tuple"); + tuple->arguments->children = new_sorting_key_expr_list->children; + new_order_by_ast = tuple; + } + + if (!data.primary_key_ast) + { + /// Primary and sorting key become independent after this ALTER so we have to + /// save the old ORDER BY expression as the new primary key. + new_primary_key_ast = data.order_by_ast->clone(); + } } - if (!data.primary_key_ast) - { - /// Primary and sorting key become independent after this ALTER so we have to - /// save the old ORDER BY expression as the new primary key. - new_primary_key_ast = data.order_by_ast->clone(); - } + if (metadata_diff.skip_indices_changed) + new_indices = IndicesDescription::parse(metadata_diff.new_skip_indices); storage_modifier = [&](IAST & ast) { @@ -455,11 +463,11 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column }; } - global_context.getDatabase(database_name)->alterTable(global_context, table_name, new_columns, storage_modifier); + global_context.getDatabase(database_name)->alterTable(global_context, table_name, new_columns, new_indices, storage_modifier); /// Even if the primary/sorting keys didn't change we must reinitialize it /// because primary key column types might have changed. - data.setPrimaryKeyAndColumns(new_order_by_ast, new_primary_key_ast, new_columns); + data.setPrimaryKeyIndicesAndColumns(new_order_by_ast, new_primary_key_ast, new_columns, new_indices); } @@ -1527,9 +1535,10 @@ void StorageReplicatedMergeTree::executeClearColumnInPartition(const LogEntry & alter_command.column_name = entry.column_name; auto new_columns = getColumns(); + auto new_indices = getIndicesDescription(); ASTPtr ignored_order_by_ast; ASTPtr ignored_primary_key_ast; - alter_command.apply(new_columns, ignored_order_by_ast, ignored_primary_key_ast); + alter_command.apply(new_columns, new_indices, ignored_order_by_ast, ignored_primary_key_ast); size_t modified_parts = 0; auto parts = data.getDataParts(); @@ -1549,7 +1558,7 @@ void StorageReplicatedMergeTree::executeClearColumnInPartition(const LogEntry & LOG_DEBUG(log, "Clearing column " << entry.column_name << " in part " << part->name); - auto transaction = data.alterDataPart(part, columns_for_parts, false); + auto transaction = data.alterDataPart(part, columns_for_parts, new_indices.indices, false); if (!transaction) continue; @@ -3117,9 +3126,10 @@ void StorageReplicatedMergeTree::alter(const AlterCommands & params, data.checkAlter(params); ColumnsDescription new_columns = data.getColumns(); + IndicesDescription new_indices = data.getIndicesDescription(); ASTPtr new_order_by_ast = data.order_by_ast; ASTPtr new_primary_key_ast = data.primary_key_ast; - params.apply(new_columns, new_order_by_ast, new_primary_key_ast); + params.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast); String new_columns_str = new_columns.toString(); if (new_columns_str != data.getColumns().toString()) @@ -3129,6 +3139,10 @@ void StorageReplicatedMergeTree::alter(const AlterCommands & params, if (new_order_by_ast.get() != data.order_by_ast.get()) new_metadata.sorting_key = serializeAST(*MergeTreeData::extractKeyExpressionList(new_order_by_ast)); + String new_indices_str = new_indices.toString(); + if (new_indices_str != data.getIndicesDescription().toString()) + new_metadata.skip_indices = new_indices_str; + String new_metadata_str = new_metadata.toString(); if (new_metadata_str != ReplicatedMergeTreeTableMetadata(data).toString()) changed_nodes.emplace_back(zookeeper_path, "metadata", new_metadata_str); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index 10675ac7107..927679dbf8c 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -552,6 +552,7 @@ protected: bool attach, const String & path_, const String & database_name_, const String & name_, const ColumnsDescription & columns_, + const IndicesDescription & indices_, Context & context_, const String & date_column_name, const ASTPtr & partition_by_ast_, diff --git a/dbms/tests/queries/0_stateless/00825_minmax_index.reference b/dbms/tests/queries/0_stateless/00825_minmax_index.reference new file mode 100644 index 00000000000..046f6502f65 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00825_minmax_index.reference @@ -0,0 +1,8 @@ +0 5 4.7 6.50 cba b 2014-01-04 +0 5 4.7 6.50 cba b 2014-03-11 +2 5 4.7 6.50 cba b 2014-06-11 +2 5 4.7 6.50 cba b 2015-01-01 +0 5 4.7 6.50 cba b 2014-01-04 +0 5 4.7 6.50 cba b 2014-03-11 +2 5 4.7 6.50 cba b 2014-06-11 +2 5 4.7 6.50 cba b 2015-01-01 diff --git a/dbms/tests/queries/0_stateless/00825_minmax_index.sql b/dbms/tests/queries/0_stateless/00825_minmax_index.sql new file mode 100644 index 00000000000..39271dc18e6 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00825_minmax_index.sql @@ -0,0 +1,41 @@ +DROP TABLE IF EXISTS test.minmax_idx; + +CREATE TABLE test.minmax_idx +( + u64 UInt64, + i32 Int32, + f64 Float64, + d Decimal(10, 2), + s String, + e Enum8('a' = 1, 'b' = 2, 'c' = 3), + dt Date, + INDEX idx_all (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 4, + INDEX idx_all2 (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 2, + INDEX idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3 +) ENGINE = MergeTree() +ORDER BY u64 +SETTINGS index_granularity = 2; + + +/* many small inserts => table will make merges */ +INSERT INTO test.minmax_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-01-01'); +INSERT INTO test.minmax_idx VALUES (0, 5, 4.7, 6.5, 'cba', 'b', '2014-01-04'); +INSERT INTO test.minmax_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2017-01-01'); +INSERT INTO test.minmax_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2016-01-01'); +INSERT INTO test.minmax_idx VALUES (2, 5, 4.7, 6.5, 'cba', 'b', '2015-01-01'); +INSERT INTO test.minmax_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-11-11'); + +INSERT INTO test.minmax_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-02-11'); +INSERT INTO test.minmax_idx VALUES (0, 5, 4.7, 6.5, 'cba', 'b', '2014-03-11'); +INSERT INTO test.minmax_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-04-11'); +INSERT INTO test.minmax_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-05-11'); +INSERT INTO test.minmax_idx VALUES (2, 5, 4.7, 6.5, 'cba', 'b', '2014-06-11'); +INSERT INTO test.minmax_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-07-11'); + +/* simple select */ +SELECT * FROM test.minmax_idx WHERE i32 = 5 AND i32 + f64 < 12 AND 3 < d AND d < 7 AND (s = 'bac' OR s = 'cba') ORDER BY dt; + +/* select with hole made by primary key */ +SELECT * FROM test.minmax_idx WHERE u64 != 1 AND e = 'b' ORDER BY dt; + +DROP TABLE test.minmax_idx; \ No newline at end of file diff --git a/dbms/tests/queries/0_stateless/00825_minmax_index_replicated_zookeeper.reference b/dbms/tests/queries/0_stateless/00825_minmax_index_replicated_zookeeper.reference new file mode 100644 index 00000000000..3b0b10a96f1 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00825_minmax_index_replicated_zookeeper.reference @@ -0,0 +1,16 @@ +0 5 4.7 6.50 cba b 2014-01-04 +0 5 4.7 6.50 cba b 2014-03-11 +2 5 4.7 6.50 cba b 2014-06-11 +2 5 4.7 6.50 cba b 2015-01-01 +0 5 4.7 6.50 cba b 2014-01-04 +0 5 4.7 6.50 cba b 2014-03-11 +2 5 4.7 6.50 cba b 2014-06-11 +2 5 4.7 6.50 cba b 2015-01-01 +0 5 4.7 6.50 cba b 2014-01-04 +0 5 4.7 6.50 cba b 2014-03-11 +2 5 4.7 6.50 cba b 2014-06-11 +2 5 4.7 6.50 cba b 2015-01-01 +0 5 4.7 6.50 cba b 2014-01-04 +0 5 4.7 6.50 cba b 2014-03-11 +2 5 4.7 6.50 cba b 2014-06-11 +2 5 4.7 6.50 cba b 2015-01-01 diff --git a/dbms/tests/queries/0_stateless/00825_minmax_index_replicated_zookeeper.sql b/dbms/tests/queries/0_stateless/00825_minmax_index_replicated_zookeeper.sql new file mode 100644 index 00000000000..f46cceb4096 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00825_minmax_index_replicated_zookeeper.sql @@ -0,0 +1,69 @@ +DROP TABLE IF EXISTS test.minmax_idx1; +DROP TABLE IF EXISTS test.minmax_idx2; + +CREATE TABLE test.minmax_idx1 +( + u64 UInt64, + i32 Int32, + f64 Float64, + d Decimal(10, 2), + s String, + e Enum8('a' = 1, 'b' = 2, 'c' = 3), + dt Date, + INDEX + idx_all (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 2, + INDEX + idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3 +) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/minmax', 'r1') +ORDER BY u64 +SETTINGS index_granularity = 2; + +CREATE TABLE test.minmax_idx2 +( + u64 UInt64, + i32 Int32, + f64 Float64, + d Decimal(10, 2), + s String, + e Enum8('a' = 1, 'b' = 2, 'c' = 3), + dt Date, + INDEX + idx_all (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 2, + INDEX + idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3 +) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/minmax', 'r2') +ORDER BY u64 +SETTINGS index_granularity = 2; + + +/* many small inserts => table will make merges */ +INSERT INTO test.minmax_idx1 VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-01-01'); +INSERT INTO test.minmax_idx1 VALUES (0, 5, 4.7, 6.5, 'cba', 'b', '2014-01-04'); +INSERT INTO test.minmax_idx2 VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2017-01-01'); +INSERT INTO test.minmax_idx2 VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2016-01-01'); +INSERT INTO test.minmax_idx2 VALUES (2, 5, 4.7, 6.5, 'cba', 'b', '2015-01-01'); +INSERT INTO test.minmax_idx1 VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-11-11'); + +SYSTEM SYNC REPLICA test.minmax_idx1; +SYSTEM SYNC REPLICA test.minmax_idx2; + +INSERT INTO test.minmax_idx1 VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-02-11'); +INSERT INTO test.minmax_idx1 VALUES (0, 5, 4.7, 6.5, 'cba', 'b', '2014-03-11'); +INSERT INTO test.minmax_idx1 VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-04-11'); +INSERT INTO test.minmax_idx1 VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-05-11'); +INSERT INTO test.minmax_idx2 VALUES (2, 5, 4.7, 6.5, 'cba', 'b', '2014-06-11'); +INSERT INTO test.minmax_idx2 VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-07-11'); + +SYSTEM SYNC REPLICA test.minmax_idx1; +SYSTEM SYNC REPLICA test.minmax_idx2; + +/* simple select */ +SELECT * FROM test.minmax_idx1 WHERE i32 = 5 AND i32 + f64 < 12 AND 3 < d AND d < 7 AND (s = 'bac' OR s = 'cba') ORDER BY dt; +SELECT * FROM test.minmax_idx2 WHERE i32 = 5 AND i32 + f64 < 12 AND 3 < d AND d < 7 AND (s = 'bac' OR s = 'cba') ORDER BY dt; + +/* select with hole made by primary key */ +SELECT * FROM test.minmax_idx1 WHERE u64 != 1 AND e = 'b' ORDER BY dt; +SELECT * FROM test.minmax_idx2 WHERE u64 != 1 AND e = 'b' ORDER BY dt; + +DROP TABLE test.minmax_idx1; +DROP TABLE test.minmax_idx2; \ No newline at end of file diff --git a/dbms/tests/queries/0_stateless/00826_indices_alter.reference b/dbms/tests/queries/0_stateless/00826_indices_alter.reference new file mode 100644 index 00000000000..43447d944de --- /dev/null +++ b/dbms/tests/queries/0_stateless/00826_indices_alter.reference @@ -0,0 +1,28 @@ +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192 +1 2 +1 2 +1 2 +1 2 +1 2 +1 2 +1 2 +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192 +1 2 +1 2 +1 2 +1 2 +1 2 +1 2 +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192 +1 2 +1 2 +1 2 +1 2 +1 2 +1 2 +1 2 +1 2 +CREATE TABLE test.minmax_idx2 ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192 +1 2 +1 2 diff --git a/dbms/tests/queries/0_stateless/00826_indices_alter.sql b/dbms/tests/queries/0_stateless/00826_indices_alter.sql new file mode 100644 index 00000000000..6749d69ff28 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00826_indices_alter.sql @@ -0,0 +1,68 @@ +DROP TABLE IF EXISTS test.minmax_idx; +DROP TABLE IF EXISTS test.minmax_idx2; + +CREATE TABLE test.minmax_idx +( + u64 UInt64, + i32 Int32 +) ENGINE = MergeTree() +ORDER BY u64; + +INSERT INTO test.minmax_idx VALUES (1, 2); + +ALTER TABLE test.minmax_idx ADD INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10; +ALTER TABLE test.minmax_idx ADD INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10; +ALTER TABLE test.minmax_idx ADD INDEX idx3 (u64 - i32) TYPE minmax GRANULARITY 10 AFTER idx1; + +SHOW CREATE TABLE test.minmax_idx; + +SELECT * FROM test.minmax_idx WHERE u64 * i32 = 2; + +INSERT INTO test.minmax_idx VALUES (1, 2); +INSERT INTO test.minmax_idx VALUES (1, 2); +INSERT INTO test.minmax_idx VALUES (1, 2); +INSERT INTO test.minmax_idx VALUES (1, 2); +INSERT INTO test.minmax_idx VALUES (1, 2); + +SELECT * FROM test.minmax_idx WHERE u64 * i32 = 2; + +ALTER TABLE test.minmax_idx DROP INDEX idx1; + +SHOW CREATE TABLE test.minmax_idx; + +SELECT * FROM test.minmax_idx WHERE u64 * i32 = 2; + +ALTER TABLE test.minmax_idx DROP INDEX idx2; +ALTER TABLE test.minmax_idx DROP INDEX idx3; + +SHOW CREATE TABLE test.minmax_idx; + +ALTER TABLE test.minmax_idx ADD INDEX idx1 (u64 * i32) TYPE minmax GRANULARITY 10; + +SHOW CREATE TABLE test.minmax_idx; + +SELECT * FROM test.minmax_idx WHERE u64 * i32 = 2; + + +CREATE TABLE test.minmax_idx2 +( + u64 UInt64, + i32 Int32, + INDEX idx1 (u64 + i32) TYPE minmax GRANULARITY 10, + INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 10 +) ENGINE = MergeTree() +ORDER BY u64; + +INSERT INTO test.minmax_idx2 VALUES (1, 2); +INSERT INTO test.minmax_idx2 VALUES (1, 2); + +SELECT * FROM test.minmax_idx2 WHERE u64 * i32 = 2; + +ALTER TABLE test.minmax_idx2 DROP INDEX idx1, DROP INDEX idx2; + +SHOW CREATE TABLE test.minmax_idx2; + +SELECT * FROM test.minmax_idx2 WHERE u64 * i32 = 2; + +DROP TABLE test.minmax_idx; +DROP TABLE test.minmax_idx2; \ No newline at end of file diff --git a/dbms/tests/queries/0_stateless/00826_indices_alter_replicated_zookeeper.reference b/dbms/tests/queries/0_stateless/00826_indices_alter_replicated_zookeeper.reference new file mode 100644 index 00000000000..ce3192788e1 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00826_indices_alter_replicated_zookeeper.reference @@ -0,0 +1,58 @@ +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 +1 2 +1 2 +1 2 +1 4 +1 5 +3 2 +19 9 +65 75 +1 2 +1 4 +1 5 +3 2 +19 9 +65 75 +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 +1 2 +1 4 +1 5 +3 2 +19 9 +65 75 +1 2 +1 4 +1 5 +3 2 +19 9 +65 75 +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 +1 2 +1 4 +1 5 +3 2 +19 9 +65 75 +1 2 +1 4 +1 5 +3 2 +19 9 +65 75 +CREATE TABLE test.minmax_idx2 ( u64 UInt64, i32 Int32, INDEX idx1 u64 + i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx2_r ( u64 UInt64, i32 Int32, INDEX idx1 u64 + i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 +1 2 +1 3 +1 2 +1 3 +CREATE TABLE test.minmax_idx2 ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx2_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 +1 2 +1 3 +1 2 +1 3 diff --git a/dbms/tests/queries/0_stateless/00826_indices_alter_replicated_zookeeper.sql b/dbms/tests/queries/0_stateless/00826_indices_alter_replicated_zookeeper.sql new file mode 100644 index 00000000000..ccd71beb6ec --- /dev/null +++ b/dbms/tests/queries/0_stateless/00826_indices_alter_replicated_zookeeper.sql @@ -0,0 +1,111 @@ +DROP TABLE IF EXISTS test.minmax_idx; +DROP TABLE IF EXISTS test.minmax_idx_r; +DROP TABLE IF EXISTS test.minmax_idx2; +DROP TABLE IF EXISTS test.minmax_idx2_r; + +CREATE TABLE test.minmax_idx +( + u64 UInt64, + i32 Int32 +) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/indices_alter1', 'r1') +ORDER BY u64; + +CREATE TABLE test.minmax_idx_r +( + u64 UInt64, + i32 Int32 +) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/indices_alter1', 'r2') +ORDER BY u64; + +INSERT INTO test.minmax_idx VALUES (1, 2); + +SYSTEM SYNC REPLICA test.minmax_idx_r; + +ALTER TABLE test.minmax_idx ADD INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10; +ALTER TABLE test.minmax_idx_r ADD INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10; +ALTER TABLE test.minmax_idx ADD INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10 AFTER idx1; + +SHOW CREATE TABLE test.minmax_idx; +SHOW CREATE TABLE test.minmax_idx_r; + +SELECT * FROM test.minmax_idx WHERE u64 * i32 = 2 ORDER BY (u64, i32); +SELECT * FROM test.minmax_idx_r WHERE u64 * i32 = 2 ORDER BY (u64, i32); + +INSERT INTO test.minmax_idx VALUES (1, 4); +INSERT INTO test.minmax_idx_r VALUES (3, 2); +INSERT INTO test.minmax_idx VALUES (1, 5); +INSERT INTO test.minmax_idx_r VALUES (65, 75); +INSERT INTO test.minmax_idx VALUES (19, 9); + +SYSTEM SYNC REPLICA test.minmax_idx; +SYSTEM SYNC REPLICA test.minmax_idx_r; + +SELECT * FROM test.minmax_idx WHERE u64 * i32 > 1 ORDER BY (u64, i32); +SELECT * FROM test.minmax_idx_r WHERE u64 * i32 > 1 ORDER BY (u64, i32); + +ALTER TABLE test.minmax_idx DROP INDEX idx1; + +SHOW CREATE TABLE test.minmax_idx; +SHOW CREATE TABLE test.minmax_idx_r; + +SELECT * FROM test.minmax_idx WHERE u64 * i32 > 1 ORDER BY (u64, i32); +SELECT * FROM test.minmax_idx_r WHERE u64 * i32 > 1 ORDER BY (u64, i32); + +ALTER TABLE test.minmax_idx DROP INDEX idx2; +ALTER TABLE test.minmax_idx_r DROP INDEX idx3; + +SHOW CREATE TABLE test.minmax_idx; +SHOW CREATE TABLE test.minmax_idx_r; + +ALTER TABLE test.minmax_idx ADD INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10; + +SHOW CREATE TABLE test.minmax_idx; +SHOW CREATE TABLE test.minmax_idx_r; + +SELECT * FROM test.minmax_idx WHERE u64 * i32 > 1 ORDER BY (u64, i32); +SELECT * FROM test.minmax_idx_r WHERE u64 * i32 > 1 ORDER BY (u64, i32); + + +CREATE TABLE test.minmax_idx2 +( + u64 UInt64, + i32 Int32, + INDEX idx1 u64 + i32 TYPE minmax GRANULARITY 10, + INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 10 +) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/indices_alter2', 'r1') +ORDER BY u64; + +CREATE TABLE test.minmax_idx2_r +( + u64 UInt64, + i32 Int32, + INDEX idx1 u64 + i32 TYPE minmax GRANULARITY 10, + INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 10 +) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/indices_alter2', 'r2') +ORDER BY u64; + + +SHOW CREATE TABLE test.minmax_idx2; +SHOW CREATE TABLE test.minmax_idx2_r; + +INSERT INTO test.minmax_idx2 VALUES (1, 2); +INSERT INTO test.minmax_idx2_r VALUES (1, 3); + +SYSTEM SYNC REPLICA test.minmax_idx2; +SYSTEM SYNC REPLICA test.minmax_idx2_r; + +SELECT * FROM test.minmax_idx2 WHERE u64 * i32 >= 2 ORDER BY (u64, i32); +SELECT * FROM test.minmax_idx2_r WHERE u64 * i32 >= 2 ORDER BY (u64, i32); + +ALTER TABLE test.minmax_idx2_r DROP INDEX idx1, DROP INDEX idx2; + +SHOW CREATE TABLE test.minmax_idx2; +SHOW CREATE TABLE test.minmax_idx2_r; + +SELECT * FROM test.minmax_idx2 WHERE u64 * i32 >= 2 ORDER BY (u64, i32); +SELECT * FROM test.minmax_idx2_r WHERE u64 * i32 >= 2 ORDER BY (u64, i32); + +DROP TABLE test.minmax_idx; +DROP TABLE test.minmax_idx_r; +DROP TABLE test.minmax_idx2; +DROP TABLE test.minmax_idx2_r; \ No newline at end of file diff --git a/dbms/tests/queries/0_stateless/00832_unique_index.reference b/dbms/tests/queries/0_stateless/00832_unique_index.reference new file mode 100644 index 00000000000..046f6502f65 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00832_unique_index.reference @@ -0,0 +1,8 @@ +0 5 4.7 6.50 cba b 2014-01-04 +0 5 4.7 6.50 cba b 2014-03-11 +2 5 4.7 6.50 cba b 2014-06-11 +2 5 4.7 6.50 cba b 2015-01-01 +0 5 4.7 6.50 cba b 2014-01-04 +0 5 4.7 6.50 cba b 2014-03-11 +2 5 4.7 6.50 cba b 2014-06-11 +2 5 4.7 6.50 cba b 2015-01-01 diff --git a/dbms/tests/queries/0_stateless/00832_unique_index.sql b/dbms/tests/queries/0_stateless/00832_unique_index.sql new file mode 100644 index 00000000000..65568255c86 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00832_unique_index.sql @@ -0,0 +1,41 @@ +DROP TABLE IF EXISTS test.unique_idx; + +CREATE TABLE test.unique_idx +( + u64 UInt64, + i32 Int32, + f64 Float64, + d Decimal(10, 2), + s String, + e Enum8('a' = 1, 'b' = 2, 'c' = 3), + dt Date, + INDEX idx_all (i32, i32 + f64, d, s, e, dt) TYPE unique GRANULARITY 4, + INDEX idx_all2 (i32, i32 + f64, d, s, e, dt) TYPE unique GRANULARITY 2, + INDEX idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE unique GRANULARITY 3 +) ENGINE = MergeTree() +ORDER BY u64 +SETTINGS index_granularity = 2; + + +/* many small inserts => table will make merges */ +INSERT INTO test.unique_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-01-01'); +INSERT INTO test.unique_idx VALUES (0, 5, 4.7, 6.5, 'cba', 'b', '2014-01-04'); +INSERT INTO test.unique_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2017-01-01'); +INSERT INTO test.unique_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2016-01-01'); +INSERT INTO test.unique_idx VALUES (2, 5, 4.7, 6.5, 'cba', 'b', '2015-01-01'); +INSERT INTO test.unique_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-11-11'); + +INSERT INTO test.unique_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-02-11'); +INSERT INTO test.unique_idx VALUES (0, 5, 4.7, 6.5, 'cba', 'b', '2014-03-11'); +INSERT INTO test.unique_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-04-11'); +INSERT INTO test.unique_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-05-11'); +INSERT INTO test.unique_idx VALUES (2, 5, 4.7, 6.5, 'cba', 'b', '2014-06-11'); +INSERT INTO test.unique_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-07-11'); + +/* simple select */ +SELECT * FROM test.unique_idx WHERE i32 = 5 AND i32 + f64 < 12 AND 3 < d AND d < 7 AND (s = 'bac' OR s = 'cba') ORDER BY dt; + +/* select with hole made by primary key */ +SELECT * FROM test.unique_idx WHERE u64 != 1 AND e = 'b' ORDER BY dt; + +DROP TABLE test.unique_idx; \ No newline at end of file diff --git a/debian/control b/debian/control index 04db4f9ae95..2be5e609315 100644 --- a/debian/control +++ b/debian/control @@ -26,6 +26,7 @@ Description: Client binary for ClickHouse Package: clickhouse-common-static Architecture: any Depends: ${shlibs:Depends}, ${misc:Depends}, tzdata +Suggests: clickhouse-common-static-dbg Replaces: clickhouse-server-base Provides: clickhouse-server-base Description: Common files for ClickHouse diff --git a/docs/en/operations/monitoring.md b/docs/en/operations/monitoring.md index b38a33917b9..cc966e192a4 100644 --- a/docs/en/operations/monitoring.md +++ b/docs/en/operations/monitoring.md @@ -21,9 +21,14 @@ It is highly recommended to set up monitoring for: ClickHouse server has embedded instruments for self-state monitoring. -To monitor server events use server logs. See the [logger](#server_settings-logger) section of the configuration file. +To track server events use server logs. See the [logger](#server_settings-logger) section of the configuration file. -ClickHouse collects different metrics of computational resources usage and common statistics of queries processing. You can find metrics in tables [system.metrics](#system_tables-metrics), [system.events](#system_tables-events) и [system.asynchronous_metrics](#system_tables-asynchronous_metrics). +ClickHouse collects: + +- Different metrics of how the server uses computational resources. +- Common statistics of queries processing. + +You can find metrics in tables [system.metrics](#system_tables-metrics), [system.events](#system_tables-events) и [system.asynchronous_metrics](#system_tables-asynchronous_metrics). You can configure ClickHouse to export metrics to [Graphite](https://github.com/graphite-project). See the [Graphite section](server_settings/settings.md#server_settings-graphite) of ClickHouse server configuration file. Before configuring metrics export, you should set up Graphite by following their official guide https://graphite.readthedocs.io/en/latest/install.html. diff --git a/docs/en/operations/requirements.md b/docs/en/operations/requirements.md index 8e8f7434a12..8d358707bce 100644 --- a/docs/en/operations/requirements.md +++ b/docs/en/operations/requirements.md @@ -2,7 +2,7 @@ ## CPU -In case of installation from prebuilt deb-packages use CPU with x86/64 architecture and SSE 4.2 instructions support. If you build ClickHouse from sources, you can use other processors. +In case of installation from prebuilt deb-packages use CPU with x86_64 architecture and SSE 4.2 instructions support. To run ClickHouse with processors than does not support SSE 4.2 or has AArch64 or PowerPC64LE architecture, you should build ClickHouse from sources. ClickHouse implements parallel data processing and uses all the hardware resources available. When choosing a processor, take into account that ClickHouse works more efficient at configurations with a large number of cores but lower clock rate than at configurations with fewer cores and a higher clock rate. For example, 16 cores with 2600 MHz is preferable than 8 cores with 3600 MHz. @@ -15,7 +15,7 @@ We recommend to use 4GB of RAM as minimum to be able to perform non-trivial quer The required volume of RAM depends on: - The complexity of queries. - - Amount of the data in queries. + - Amount of the data, that processed in queries. To calculate the required volume of RAM, you should estimate the size of temporary data for [GROUP BY](../query_language/select.md#select-group-by-clause), [DISTINCT](../query_language/select.md#select-distinct), [JOIN](../query_language/select.md#select-join) and other operations you use. @@ -33,20 +33,22 @@ The volume of storage required for your data should be calculated separately. As - Estimation of a data volume. - You can take the sample of the data and get the size of a row from it. Then multiply the size of the row with a number of rows you plan to store. + You can take the sample of the data and get an average size of a row from it. Then multiply the value with a number of rows you plan to store. - Data compression coefficient. - To estimate the data compression coefficient, load some sample of your data into ClickHouse and compare the actual size of the data with the size of the table stored. For example, the typical compression coefficient for clickstream data lays in a range of 6-10 times. + To estimate the data compression coefficient, load some sample of your data into ClickHouse and compare the actual size of the data with the size of the table stored. For example, clickstream data are usually compressed by 6-10 times. -To calculate the final volume of data to be stored, divide the estimated data volume by the compression coefficient. +To calculate the final volume of data to be stored, apply the compression coefficient to the estimated data volume. If you plan to store data in several replicas, then multiply estimated volume with the number of replicas. ## Network -If possible, use a 10G network. +If possible, use networks of 10G of higher class. A bandwidth of the network is critical for processing of distributed queries with a large amount of intermediate data. Also, network speed affects replication processes. ## Software -ClickHouse is developed for Linux family of operating systems. The recommended Linux distribution is Ubuntu. The `tzdata` package should be installed in the system. Name and version of an operating system where ClickHouse runs depend on the method of installation. See details in [Getting started](../getting_started/index.md) section of the documentation. +ClickHouse is developed for Linux family of operating systems. The recommended Linux distribution is Ubuntu. The `tzdata` package should be installed in the system. + +ClickHouse also can work in other families of operating systems. See details in [Getting started](../getting_started/index.md) section of the documentation. diff --git a/docs/en/operations/table_engines/mergetree.md b/docs/en/operations/table_engines/mergetree.md index ec2c05b786f..473f2109e99 100644 --- a/docs/en/operations/table_engines/mergetree.md +++ b/docs/en/operations/table_engines/mergetree.md @@ -34,6 +34,8 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], ... + INDEX index_name1 expr1 TYPE type1(...) GRANULARITY value1, + INDEX index_name2 expr2 TYPE type2(...) GRANULARITY value2 ) ENGINE = MergeTree() [PARTITION BY expr] [ORDER BY expr] @@ -225,6 +227,56 @@ To check whether ClickHouse can use the index when running a query, use the sett The key for partitioning by month allows reading only those data blocks which contain dates from the proper range. In this case, the data block may contain data for many dates (up to an entire month). Within a block, data is sorted by primary key, which might not contain the date as the first column. Because of this, using a query with only a date condition that does not specify the primary key prefix will cause more data to be read than for a single date. + +### Data Skipping Indices + +Index declaration in the columns section of the `CREATE` query. +```sql +INDEX index_name expr TYPE type(...) GRANULARITY granularity_value +``` + +For tables from the `*MergeTree` family data skipping indices can be specified. + +These indices aggregate some information about the specified expression on blocks, which consist of `granularity_value` granules, +then these aggregates are used in `SELECT` queries for reducing the amount of data to read from the disk by skipping big blocks of data where `where` query cannot be satisfied. + + +Example +```sql +CREATE TABLE table_name +( + u64 UInt64, + i32 Int32, + s String, + ... + INDEX a (u64 * i32, s) TYPE minmax GRANULARITY 3, + INDEX b (u64 * length(s)) TYPE unique GRANULARITY 4 +) ENGINE = MergeTree() +... +``` + +Indices from the example can be used by ClickHouse to reduce the amount of data to read from disk in following queries. +```sql +SELECT count() FROM table WHERE s < 'z' +SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 +``` + +#### Available Types of Indices + +* `minmax` +Stores extremes of the specified expression (if the expression is `tuple`, then it stores extremes for each element of `tuple`), uses stored info for skipping blocks of the data like the primary key. + +* `unique(max_rows)` +Stores unique values of the specified expression (no more than `max_rows` rows), use them to check if the `WHERE` expression is not satisfiable on a block of the data. +If `max_rows=0`, then there are no limits for storing values. `unique` without parameters is equal to `unique(0)`. + +```sql +INDEX sample_index (u64 * length(s)) TYPE minmax GRANULARITY 4 +INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE unique GRANULARITY 4 +INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE unique(100) GRANULARITY 4 +``` + + ## Concurrent Data Access For concurrent table access, we use multi-versioning. In other words, when a table is simultaneously read and updated, data is read from a set of parts that is current at the time of the query. There are no lengthy locks. Inserts do not get in the way of read operations. diff --git a/docs/en/operations/troubleshooting.md b/docs/en/operations/troubleshooting.md index fb12820493f..511bbfa2205 100644 --- a/docs/en/operations/troubleshooting.md +++ b/docs/en/operations/troubleshooting.md @@ -1,13 +1,11 @@ # Troubleshooting -Known issues: +- [Installation](#troubleshooting-installation-errors) +- [Connecting to the server](#troubleshooting-accepts-no-connections) +- [Queries processing](#troubleshooting-does-not-process-queries) +- [Efficiency of queries processing](#troubleshooting-too-slow) -- [Installation errors](#troubleshooting-installation-errors). -- [The server does not accept the connections](#troubleshooting-accepts-no-connections). -- [ClickHouse does not process queries](#troubleshooting-does-not-process-queries). -- [ClickHouse processes queries too slow](#troubleshooting-too-slow). - -## Installation Errors {#troubleshooting-installation-errors} +## Installation {#troubleshooting-installation-errors} ### You Can Not Get Deb-packages from ClickHouse Repository With apt-get @@ -15,9 +13,9 @@ Known issues: - If you can not access the repository by any reason, download packages as described in the [Getting started](../getting_started/index.md) article and install them manually with `sudo dpkg -i ` command. Also, you need `tzdata` package. -## Server Does Not Accept the Connections {#troubleshooting-accepts-no-connections} +## Connecting to the Server {#troubleshooting-accepts-no-connections} -Possible reasons: +Possible issues: - The server is not running. - Unexpected or wrong configuration parameters. @@ -122,7 +120,7 @@ Check: You may use the wrong user name or password for it. -## ClickHouse Does Not Process Queries {#troubleshooting-does-not-process-queries} +## Queries Processing {#troubleshooting-does-not-process-queries} If ClickHouse can not process the query, it sends the description of an error to the client. In the `clickhouse-client` you get a description of an error in console. If you use HTTP interface, ClickHouse sends error description in response body. For example, @@ -135,7 +133,7 @@ If you start `clickhouse-client` with `stack-trace` parameter, ClickHouse return It is possible that you see the message of connection broken. In this case, you can repeat query. If connection brakes any time you perform the query you should check the server logs for errors. -## ClickHouse Processes Queries Not Fast Enough {#troubleshooting-too-slow} +## Efficiency of Queries Processing {#troubleshooting-too-slow} If you see that ClickHouse works too slow, you need to profile the load of the server resources and network for your queries. diff --git a/docs/en/query_language/alter.md b/docs/en/query_language/alter.md index 7b8808fca6c..5a2229cb305 100644 --- a/docs/en/query_language/alter.md +++ b/docs/en/query_language/alter.md @@ -83,6 +83,19 @@ rows are ordered by the sorting key expression you cannot add expressions contai to the sorting key (only columns added by the `ADD COLUMN` command in the same `ALTER` query). +### Manipulations With Data Skipping Indices + +It only works for tables in the [`*MergeTree`](../operations/table_engines/mergetree.md) family (including +[replicated](../operations/table_engines/replication.md) tables). The following operations +are available: + +* `ALTER ADD INDEX name expression TYPE type GRANULARITY value AFTER name [AFTER name2]` - Adds index description to tables metadata. + +* `ALTER DROP INDEX name` - Removes index description from tables metadata and deletes index files from disk. + +These commands are lightweight in a sense that they only change metadata or remove files. +Also, they are replicated (syncing indices metadata through ZooKeeper). + ### Manipulations With Partitions and Parts It only works for tables in the [`MergeTree`](../operations/table_engines/mergetree.md) family (including diff --git a/docs/ru/operations/monitoring.md b/docs/ru/operations/monitoring.md new file mode 100644 index 00000000000..3fe59c92573 --- /dev/null +++ b/docs/ru/operations/monitoring.md @@ -0,0 +1,37 @@ +# Мониторинг + +Вы можете отслеживать: + +- Использование аппаратных ресурсов. +- Метрики сервера ClickHouse. + +## Использование ресурсов + +ClickHouse не отслеживает состояние аппаратных ресурсов самостоятельно. + +Рекомендуем контролировать: + +- Загрузку и температуру процессоров. + + Можно использовать [dmesg](https://en.wikipedia.org/wiki/Dmesg), [turbostat](https://www.linux.org/docs/man8/turbostat.html) или другие инструменты. + +- Использование системы хранения, оперативной памяти и сети. + +## Метрики сервера ClickHouse. + +Сервер ClickHouse имеет встроенные инструменты мониторинга. + +Для отслеживания событий на сервере используйте логи. Подробнее смотрите в разделе конфигурационного файла [logger](#server_settings-logger). + +ClickHouse собирает: + +- Различные метрики того, как сервер использует вычислительные ресурсы. +- Общую статистику обработки запросов. + +Метрики находятся в таблицах [system.metrics](#system_tables-metrics), [system.events](#system_tables-events) и [system.asynchronous_metrics](#system_tables-asynchronous_metrics). + +Можно настроить экспорт метрик из ClickHouse в [Graphite](https://github.com/graphite-project). Смотрите секцию [graphite](server_settings/settings.md#server_settings-graphite) конфигурационного файла ClickHouse. Перед настройкой экспорта метрик необходимо настроить Graphite, как указано в [официальном руководстве](https://graphite.readthedocs.io/en/latest/install.html). + +Также, можно отслеживать доступность сервера через HTTP API. Отправьте `HTTP GET` к ресурсу `/`. Если сервер доступен, он отвечает `200 OK`. + +Для мониторинга серверов в кластерной конфигурации необходимо установить параметр [max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries) и использовать HTTP ресурс `/replicas-delay`. Если реплика доступна и не отстаёт от других реплик, то запрос к `/replicas-delay` возвращает `200 OK`. Если реплика отстаёт, то она возвращает информацию о размере отставания. diff --git a/docs/ru/operations/requirements.md b/docs/ru/operations/requirements.md new file mode 100644 index 00000000000..175e01b9932 --- /dev/null +++ b/docs/ru/operations/requirements.md @@ -0,0 +1,54 @@ +# Требования + +## Процессор + +В случае установки из готовых deb-пакетов используйте процессоры с архитектурой x86_64 и поддержкой инструкций SSE 4.2. Для запуска ClickHouse на процессорах без поддержки SSE 4.2 или на процессорах с архитектурой AArch64 и PowerPC64LE необходимо собирать ClickHouse из исходников. + +ClickHouse реализует параллельную обработку данных и использует все доступные аппаратные ресурсы. При выборе процессора учитывайте, что ClickHouse работает более эффективно в конфигурациях с большим количеством ядер, но с более низкой тактовой частотой, чем в конфигурациях с меньшим количеством ядер и более высокой тактовой частотой. Например, 16 ядер с 2600 MHz предпочтительнее, чем 8 ядер с 3600 MHz. + +Рекомендуется использовать технологии **Turbo Boost** и **hyper-threading**. Их использование существенно улучшает производительность при типичной нагрузке. + +## RAM + +Мы рекомендуем использовать как минимум 4 ГБ оперативной памяти, чтобы иметь возможность выполнять нетривиальные запросы. Сервер ClickHouse может работать с гораздо меньшим объёмом RAM, память требуется для обработки запросов. + +Необходимый объем RAM зависит от: + +- Сложности запросов. +- Объёма данных, обрабатываемых в запросах. + +Для расчета объема RAM необходимо оценить размер промежуточных данных для операций [GROUP BY](../query_language/select.md#select-group-by-clause), [DISTINCT](../query_language/select.md#select-distinct), [JOIN](../query_language/select.md#select-join) а также других операций, которыми вы пользуетесь. + +ClickHouse может использовать внешнюю память для промежуточных данных. Подробнее смотрите в разделе [GROUP BY во внешней памяти](../query_language/select.md#select-group-by-in-external-memory). + +## Файл подкачки + +Отключайте файл подкачки в продуктовых средах. + +## Подсистема хранения + +Для установки ClickHouse необходимо 2ГБ свободного места на диске. + +Объём дискового пространства, необходимый для хранения ваших данных, необходимо рассчитывать отдельно. Расчёт должен включать: + +- Приблизительную оценку объёма данных. + + Можно взять образец данных и получить из него средний размер строки. Затем умножьте полученное значение на количество строк, которое вы планируете хранить. + +- Оценку коэффициента сжатия данных. + + Чтобы оценить коэффициент сжатия данных, загрузите некоторую выборку данных в ClickHouse и сравните действительный размер данных с размером сохранённой таблицы. Например, данные типа clickstream обычно сжимаются в 6-10 раз. + +Для оценки объёма хранилища, примените коэффициент сжатия к размеру данных. Если вы планируете хранить данные в нескольких репликах, то необходимо полученный объём умножить на количество реплик. + +## Сеть + +По возможности, используйте сети 10G и более высокого класса. + +Пропускная способность сети критически важна для обработки распределенных запросов с большим количеством промежуточных данных. Также, скорость сети влияет на задержки в процессах репликации. + +## Программное обеспечение + +ClickHouse разработан для семейства операционных систем Linux. Рекомендуемый дистрибутив Linux — Ubuntu. В системе должен быть установлен пакет `tzdata`. + +ClickHouse может работать и в других семействах операционных систем. Подробнее смотрите разделе документации [Начало работы](../getting_started/index.md). diff --git a/docs/ru/operations/table_engines/mergetree.md b/docs/ru/operations/table_engines/mergetree.md index d0410ccea58..7318e4d8145 100644 --- a/docs/ru/operations/table_engines/mergetree.md +++ b/docs/ru/operations/table_engines/mergetree.md @@ -32,6 +32,8 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], ... + INDEX index_name1 expr1 TYPE type1(...) GRANULARITY value1, + INDEX index_name2 expr2 TYPE type2(...) GRANULARITY value2 ) ENGINE = MergeTree() [PARTITION BY expr] [ORDER BY expr] @@ -224,6 +226,50 @@ SELECT count() FROM table WHERE CounterID = 34 OR URL LIKE '%upyachka%' Ключ партиционирования по месяцам обеспечивает чтение только тех блоков данных, которые содержат даты из нужного диапазона. При этом блок данных может содержать данные за многие даты (до целого месяца). В пределах одного блока данные упорядочены по первичному ключу, который может не содержать дату в качестве первого столбца. В связи с этим, при использовании запроса с указанием условия только на дату, но не на префикс первичного ключа, будет читаться данных больше, чем за одну дату. +### Дополнительные индексы + +Для таблиц семейства `*MergeTree` можно задать дополнительные индексы в секции столбцов. + +Индексы аггрегируют для заданного выражения некоторые данные, а потом при `SELECT` запросе используют для пропуска боков данных (пропускаемый блок состоих из гранул данных в количестве равном гранулярности данного индекса), на которых секция `WHERE` не может быть выполнена, тем самым уменьшая объем данных читаемых с диска. + +Пример +```sql +CREATE TABLE table_name +( + u64 UInt64, + i32 Int32, + s String, + ... + INDEX a (u64 * i32, s) TYPE minmax GRANULARITY 3, + INDEX b (u64 * length(s), i32) TYPE unique GRANULARITY 4 +) ENGINE = MergeTree() +... +``` + +Эти индексы смогут использоваться для оптимизации следующих запросов +```sql +SELECT count() FROM table WHERE s < 'z' +SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 +``` + +#### Доступные индексы + +* `minmax` +Хранит минимум и максимум выражения (если выражение - `tuple`, то для каждого элемента `tuple`), используя их для пропуска блоков аналогично первичному ключу. + +* `unique(max_rows)` +Хранит уникальные значения выражения на блоке в количестве не более `max_rows`, используя их для пропуска блоков, оценивая выполнимость `WHERE` выражения на хранимых данных. +Если `max_rows=0`, то хранит значения выражения без ограничений. Если параметров не передано, то полагается `max_rows=0`. + + +Примеры +```sql +INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE minmax GRANULARITY 4 +INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE unique GRANULARITY 4 +INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE unique(100) GRANULARITY 4 +``` + + ## Конкурентный доступ к данным Для конкурентного доступа к таблице используется мультиверсионность. То есть, при одновременном чтении и обновлении таблицы, данные будут читаться из набора кусочков, актуального на момент запроса. Длинных блокировок нет. Вставки никак не мешают чтениям. diff --git a/docs/ru/operations/troubleshooting.md b/docs/ru/operations/troubleshooting.md new file mode 100644 index 00000000000..1eaaf26934d --- /dev/null +++ b/docs/ru/operations/troubleshooting.md @@ -0,0 +1,139 @@ +# Устранение неисправностей + +- [Установка дистрибутива](#troubleshooting-installation-errors) +- [Соединение с сервером](#troubleshooting-accepts-no-connections) +- [Обработка запросов](#troubleshooting-does-not-process-queries) +- [Скорость обработки запросов](#troubleshooting-too-slow) + +## Установка дистрибутива {#troubleshooting-installation-errors} + +### Не получается скачать deb-пакеты из репозитория ClickHouse с помощью apt-get + +- Проверьте настройки брандмауэра. +- Если по какой-либо причине вы не можете получить доступ к репозиторию, скачайте пакеты как описано в разделе [Начало работы](../getting_started/index.md) и установите их вручную командой `sudo dpkg -i `. Также, необходим пакет `tzdata`. + +## Соединение с сервером {#troubleshooting-accepts-no-connections} + +Возможные проблемы: + +- Сервер не запущен. +- Неожиданные или неправильные параметры конфигурации. + +### Сервер не запущен + +**Проверьте, запущен ли сервер** + +Команда: + +``` +sudo service clickhouse-server status +``` + +Если сервер не запущен, запустите его с помощью команды: + +``` +sudo service clickhouse-server start +``` + +**Проверьте журналы** + +Основной лог `clickhouse-server` по умолчанию — `/var/log/clickhouse-server/clickhouse-server.log`. + +В случае успешного запуска вы должны увидеть строки, содержащие: + +- ` Application: starting up.` — сервер запускается. +- ` Application: Ready for connections.` — сервер запущен и готов принимать соединения. + +Если `clickhouse-server` не запустился из-за ошибки конфигурации вы увидите `` строку с описанием ошибки. Например: + +``` +2019.01.11 15:23:25.549505 [ 45 ] {} ExternalDictionaries: Failed reloading 'event2id' external dictionary: Poco::Exception. Code: 1000, e.code() = 111, e.displayText() = Connection refused, e.what() = Connection refused +``` + +Если вы не видите ошибки в конце файла, просмотрите весь файл начиная со строки: + +``` + Application: starting up. +``` + +При попытке запустить второй экземпляр `clickhouse-server` журнал выглядит следующим образом: + +``` +2019.01.11 15:25:11.151730 [ 1 ] {} : Starting ClickHouse 19.1.0 with revision 54413 +2019.01.11 15:25:11.154578 [ 1 ] {} Application: starting up +2019.01.11 15:25:11.156361 [ 1 ] {} StatusFile: Status file ./status already exists - unclean restart. Contents: +PID: 8510 +Started at: 2019-01-11 15:24:23 +Revision: 54413 + +2019.01.11 15:25:11.156673 [ 1 ] {} Application: DB::Exception: Cannot lock file ./status. Another server instance in same directory is already running. +2019.01.11 15:25:11.156682 [ 1 ] {} Application: shutting down +2019.01.11 15:25:11.156686 [ 1 ] {} Application: Uninitializing subsystem: Logging Subsystem +2019.01.11 15:25:11.156716 [ 2 ] {} BaseDaemon: Stop SignalListener thread +``` + +**Проверьте логи system.d** + +Если из логов `clickhouse-server` вы не получили необходимой информации или логов нет, то вы можете посмотреть логи `system.d` командой: + +``` +sudo journalctl -u clickhouse-server +``` + +**Запустите clickhouse-server в интерактивном режиме** + +``` +sudo -u clickhouse /usr/bin/clickhouse-server --config-file /etc/clickhouse-server/config.xml +``` + +Эта команда запускает сервер как интерактивное приложение со стандартными параметрами скрипта автозапуска. В этом режиме `clickhouse-server` выводит сообщения в консоль. + +### Параметры конфигурации + +Проверьте: + +- Настройки Docker. + + При запуске ClickHouse в Docker в сети IPv6 убедитесь, что установлено `network=host`. + +- Параметры endpoint. + + Проверьте настройки [listen_host](server_settings/settings.md#server_settings-listen_host) и [tcp_port](server_settings/settings.md#server_settings-tcp_port). + + По умолчанию, сервер ClickHouse принимает только локальные подключения. + +- Настройки протокола HTTP. + + Проверьте настройки протокола для HTTP API. + +- Параметры безопасного подключения. + + Проверьте: + + - Настройку `tcp_port_secure`. + - Параметры для SSL-сертификатов. + + Используйте правильные параметры при подключении. Например, используйте параметр `port_secure` при использовании `clickhouse_client`. + +- Настройки пользователей. + + Возможно, вы используете неверное имя пользователя или пароль. + +## Обработка запросов {#troubleshooting-does-not-process-queries} + +Если ClickHouse не может обработать запрос, он отправляет клиенту описание ошибки. В `clickhouse-client` вы получаете описание ошибки в консоли. При использовании интерфейса HTTP, ClickHouse отправляет описание ошибки в теле ответа. Например: + +```bash +$ curl 'http://localhost:8123/' --data-binary "SELECT a" +Code: 47, e.displayText() = DB::Exception: Unknown identifier: a. Note that there is no tables (FROM clause) in your query, context: required_names: 'a' source_tables: table_aliases: private_aliases: column_aliases: public_columns: 'a' masked_columns: array_join_columns: source_columns: , e.what() = DB::Exception +``` + +Если вы запускаете `clickhouse-client` c параметром `stack-trace`, то ClickHouse возвращает описание ошибки и соответствущий стек вызовов функций на сервере. + +Может появиться сообщение о разрыве соединения. В этом случае необходимо повторить запрос. Если соединение прерывается каждый раз при выполнении запроса, следует проверить журналы сервера на наличие ошибок. + +## Скорость обработки запросов {#troubleshooting-too-slow} + +Если вы видите, что ClickHouse работает слишком медленно, необходимо профилировать загрузку ресурсов сервера и сети для ваших запросов. + +Для профилирования запросов можно использовать утилиту clickhouse-benchmark. Она показывает количество запросов, обработанных за секунду, количество строк, обработанных за секунду и перцентили времени обработки запросов. diff --git a/docs/ru/query_language/alter.md b/docs/ru/query_language/alter.md index 37c497bc6c6..2f4e94b56eb 100644 --- a/docs/ru/query_language/alter.md +++ b/docs/ru/query_language/alter.md @@ -79,6 +79,20 @@ MODIFY ORDER BY new_expression сортировки, разрешено добавлять в ключ только новые столбцы (т.е. столбцы, добавляемые командой `ADD COLUMN` в том же запросе `ALTER`), у которых нет выражения по умолчанию. +### Манипуляции с индексами + +Добавить или удалить индекс можно с помощью операций +``` +ALTER ADD INDEX name expression TYPE type GRANULARITY value [AFTER name] +ALTER DROP INDEX name +``` +Поддерживается только таблицами семейства `*MergeTree`. + +Команда `ALTER ADD INDEX` добавляет описание индексов в метаданные, а `ALTER DROP INDEX` удаляет индекс из метаданных и стирает файлы индекса с диска, поэтому они легковесные и работают мгновенно. + +Если индекс появился в метаданных, то он начнет считаться в последующих слияниях и записях в таблицу, а не сразу после выполнения операции `ALTER`. + +Запрос на изменение индексов реплицируется, сохраняя новые метаданные в ZooKeeper и применяя изменения на всех репликах. ### Манипуляции с партициями и кусками