From 8f7c2b58218db084ed38f53ee0de52777146c56c Mon Sep 17 00:00:00 2001 From: Sabyanin Maxim Date: Mon, 1 Oct 2018 23:16:50 +0300 Subject: [PATCH 001/145] first try --- dbms/src/Parsers/ASTColumnDeclaration.h | 11 ++++++++++ dbms/src/Parsers/ParserCreateQuery.h | 22 +++++++++++++++++-- dbms/src/Parsers/ParserTablePropertiesQuery.h | 1 + 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/dbms/src/Parsers/ASTColumnDeclaration.h b/dbms/src/Parsers/ASTColumnDeclaration.h index 308e9b66526..845fe175155 100644 --- a/dbms/src/Parsers/ASTColumnDeclaration.h +++ b/dbms/src/Parsers/ASTColumnDeclaration.h @@ -15,6 +15,7 @@ public: ASTPtr type; String default_specifier; ASTPtr default_expression; + ASTPtr comment_expression; String getID() const override { return "ColumnDeclaration_" + name; } @@ -35,6 +36,11 @@ public: res->children.push_back(res->default_expression); } + if (comment_expression) { + res->comment_expression = comment_expression->clone(); + res->children.push_back(res->comment_expression); // TODO: понять, зачем это нужно. + } + return res; } @@ -56,6 +62,11 @@ protected: settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << default_specifier << (settings.hilite ? hilite_none : "") << ' '; default_expression->formatImpl(settings, state, frame); } + + // TODO: понять, почему не отрицание + if (comment_expression) { + comment_expression->formatImpl(settings, state, frame); + } } }; diff --git a/dbms/src/Parsers/ParserCreateQuery.h b/dbms/src/Parsers/ParserCreateQuery.h index 75ce5b80537..72286c155e4 100644 --- a/dbms/src/Parsers/ParserCreateQuery.h +++ b/dbms/src/Parsers/ParserCreateQuery.h @@ -111,6 +111,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ParserKeyword s_default{"DEFAULT"}; ParserKeyword s_materialized{"MATERIALIZED"}; ParserKeyword s_alias{"ALIAS"}; + ParserKeyword s_comment{"COMMENT"}; ParserTernaryOperatorExpression expr_parser; /// mandatory column name @@ -119,13 +120,14 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E return false; /** column name should be followed by type name if it - * is not immediately followed by {DEFAULT, MATERIALIZED, ALIAS} + * is not immediately followed by {DEFAULT, MATERIALIZED, ALIAS, COMMENT} */ ASTPtr type; const auto fallback_pos = pos; if (!s_default.check(pos, expected) && !s_materialized.check(pos, expected) && - !s_alias.check(pos, expected)) + !s_alias.check(pos, expected) && + !s_comment.check(pos, expected)) { type_parser.parse(pos, type, expected); } @@ -149,6 +151,17 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E else if (!type) return false; /// reject sole column name without type + String comment_specifier; + ASTPtr comment_expression; + pos_before_specifier = pos; + if (s_comment.ignore(pos, expected)) + { + comment_specifier = Poco::toUpper(std::string{pos_before_specifier->begin, pos_specifier->end}); + if (!expr_parser.parse(pos, comment_expression, expected)) { + return false; + } + } + const auto column_declaration = std::make_shared(); node = column_declaration; column_declaration->name = typeid_cast(*name).name; @@ -165,6 +178,11 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E column_declaration->children.push_back(std::move(default_expression)); } + if (comment_expression) { + column_declaration->comment_expression = comment_expression; + column_declaration->children.push_back(std::move(comment_expression)); + } + return true; } diff --git a/dbms/src/Parsers/ParserTablePropertiesQuery.h b/dbms/src/Parsers/ParserTablePropertiesQuery.h index 1bc4a06f161..c5321dd472b 100644 --- a/dbms/src/Parsers/ParserTablePropertiesQuery.h +++ b/dbms/src/Parsers/ParserTablePropertiesQuery.h @@ -8,6 +8,7 @@ namespace DB { +// TODO: возможно тут тоже надо разобраться /** Query (EXISTS | SHOW CREATE) [TABLE] [db.]name [FORMAT format] */ class ParserTablePropertiesQuery : public IParserBase From ab050c8466f9104cce0e08f697570cdb9de39b93 Mon Sep 17 00:00:00 2001 From: Sabyanin Maxim Date: Sun, 14 Oct 2018 18:30:06 +0300 Subject: [PATCH 002/145] add comment section in column declaration. add comment column type in alter method. add comment section in system.columns. --- dbms/programs/server/TCPHandler.cpp | 2 +- .../Interpreters/InterpreterCreateQuery.cpp | 29 ++++- .../src/Interpreters/InterpreterCreateQuery.h | 2 +- dbms/src/Parsers/ASTAlterQuery.cpp | 7 + dbms/src/Parsers/ASTAlterQuery.h | 5 + dbms/src/Parsers/ASTColumnDeclaration.h | 7 +- dbms/src/Parsers/ParserAlterQuery.cpp | 11 ++ dbms/src/Parsers/ParserAlterQuery.h | 1 + dbms/src/Parsers/ParserCreateQuery.h | 13 +- dbms/src/Storages/AlterCommands.cpp | 23 ++++ dbms/src/Storages/AlterCommands.h | 6 +- dbms/src/Storages/ColumnComment.cpp | 7 + dbms/src/Storages/ColumnComment.h | 19 +++ dbms/src/Storages/ColumnsDescription.cpp | 121 +++++++++++++----- dbms/src/Storages/ColumnsDescription.h | 9 +- dbms/src/Storages/IStorage.h | 16 ++- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 5 + .../Storages/System/StorageSystemColumns.cpp | 18 +++ .../00725_comment_columns.reference | 28 ++++ .../0_stateless/00725_comment_columns.sql | 60 +++++++++ 20 files changed, 331 insertions(+), 58 deletions(-) create mode 100644 dbms/src/Storages/ColumnComment.cpp create mode 100644 dbms/src/Storages/ColumnComment.h create mode 100644 dbms/tests/queries/0_stateless/00725_comment_columns.reference create mode 100644 dbms/tests/queries/0_stateless/00725_comment_columns.sql diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index da2a5bbea2b..0005d1433ab 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -716,7 +716,7 @@ bool TCPHandler::receiveData() { NamesAndTypesList columns = block.getNamesAndTypesList(); storage = StorageMemory::create(external_table_name, - ColumnsDescription{columns, NamesAndTypesList{}, NamesAndTypesList{}, ColumnDefaults{}}); + ColumnsDescription{columns, NamesAndTypesList{}, NamesAndTypesList{}, ColumnDefaults{}, ColumnComments{}}); storage->startup(); query_context.addExternalTable(external_table_name, storage); } diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 337ad39abef..2f8716b87a9 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -166,13 +166,15 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) using ColumnsAndDefaults = std::pair; +using ParsedColumns = std::tuple; /// AST to the list of columns with types. Columns of Nested type are expanded into a list of real columns. -static ColumnsAndDefaults parseColumns(const ASTExpressionList & column_list_ast, const Context & context) +static ParsedColumns parseColumns(const ASTExpressionList & column_list_ast, const Context & context) { /// list of table columns in correct order NamesAndTypesList columns{}; ColumnDefaults defaults{}; + ColumnComments comments{}; /// Columns requiring type-deduction or default_expression type-check std::vector> defaulted_columns{}; @@ -216,6 +218,11 @@ static ColumnsAndDefaults parseColumns(const ASTExpressionList & column_list_ast else default_expr_list->children.emplace_back(setAlias(col_decl.default_expression->clone(), col_decl.name)); } + + if (col_decl.comment_expression) + { + comments.emplace(col_decl.name, ColumnComment{col_decl.comment_expression}); + } } /// set missing types and wrap default_expression's in a conversion-function if necessary @@ -261,7 +268,7 @@ static ColumnsAndDefaults parseColumns(const ASTExpressionList & column_list_ast } } - return {Nested::flatten(columns), defaults}; + return {Nested::flatten(columns), defaults, comments}; } @@ -329,11 +336,17 @@ ASTPtr InterpreterCreateQuery::formatColumns(const ColumnsDescription & columns) column_declaration->type = parseQuery(storage_p, pos, end, "data type", 0); column_declaration->type->owned_string = type_name; - const auto it = columns.defaults.find(column.name); - if (it != std::end(columns.defaults)) + const auto defaults_it = columns.defaults.find(column.name); + if (defaults_it != std::end(columns.defaults)) { - column_declaration->default_specifier = toString(it->second.kind); - column_declaration->default_expression = it->second.expression->clone(); + column_declaration->default_specifier = toString(defaults_it->second.kind); + column_declaration->default_expression = defaults_it->second.expression->clone(); + } + + const auto comments_it = columns.comments.find(column.name); + if (comments_it != std::end(columns.comments)) + { + column_declaration->comment_expression = comments_it->second.expression->clone(); } columns_list->children.push_back(column_declaration_ptr); @@ -347,11 +360,13 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTExpres { ColumnsDescription res; - auto && columns_and_defaults = parseColumns(columns, context); + auto && parsed_columns = parseColumns(columns, context); + auto columns_and_defaults = std::make_pair(std::move(std::get<0>(parsed_columns)), std::move(std::get<1>(parsed_columns))); res.materialized = removeAndReturnColumns(columns_and_defaults, ColumnDefaultKind::Materialized); res.aliases = removeAndReturnColumns(columns_and_defaults, ColumnDefaultKind::Alias); res.ordinary = std::move(columns_and_defaults.first); res.defaults = std::move(columns_and_defaults.second); + res.comments = std::move(std::get<2>(parsed_columns)); if (res.ordinary.size() + res.materialized.size() == 0) throw Exception{"Cannot CREATE table without physical columns", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED}; diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.h b/dbms/src/Interpreters/InterpreterCreateQuery.h index 2f186764866..e450ae0728e 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.h +++ b/dbms/src/Interpreters/InterpreterCreateQuery.h @@ -44,7 +44,7 @@ public: internal = internal_; } - /// Obtain information about columns, their types and default values, for case when columns in CREATE query is specified explicitly. + /// Obtain information about columns, their types, default values and column comments, for case when columns in CREATE query is specified explicitly. static ColumnsDescription getColumnsDescription(const ASTExpressionList & columns, const Context & context); /// Check that column types are allowed for usage in table according to settings. static void checkSupportedTypes(const ColumnsDescription & columns, const Context & context); diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp index 7081b512247..827906a4294 100644 --- a/dbms/src/Parsers/ASTAlterQuery.cpp +++ b/dbms/src/Parsers/ASTAlterQuery.cpp @@ -141,6 +141,13 @@ void ASTAlterCommand::formatImpl( settings.ostr << (settings.hilite ? hilite_keyword : "") << " WHERE " << (settings.hilite ? hilite_none : ""); predicate->formatImpl(settings, state, frame); } + else if (type == ASTAlterCommand::COMMENT_COLUMN) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "COMMENT COLUMN " << (settings.hilite ? hilite_none : ""); + column->formatImpl(settings, state, frame); + settings.ostr << " "; + comment->formatImpl(settings, state, frame); + } else throw Exception("Unexpected type of ALTER", ErrorCodes::UNEXPECTED_AST_STRUCTURE); } diff --git a/dbms/src/Parsers/ASTAlterQuery.h b/dbms/src/Parsers/ASTAlterQuery.h index c79f9ba8b2f..949d4ecf851 100644 --- a/dbms/src/Parsers/ASTAlterQuery.h +++ b/dbms/src/Parsers/ASTAlterQuery.h @@ -14,6 +14,7 @@ namespace DB * DROP COLUMN col_drop [FROM PARTITION partition], * MODIFY COLUMN col_name type, * DROP PARTITION partition, + * COMMENT_COLUMN col_name 'comment', */ class ASTAlterCommand : public IAST @@ -25,6 +26,7 @@ public: DROP_COLUMN, MODIFY_COLUMN, MODIFY_PRIMARY_KEY, + COMMENT_COLUMN, DROP_PARTITION, ATTACH_PARTITION, @@ -66,6 +68,9 @@ public: /// A list of expressions of the form `column = expr` for the UPDATE command. ASTPtr update_assignments; + /// A column comment + ASTPtr comment; + bool detach = false; /// true for DETACH PARTITION bool part = false; /// true for ATTACH PART diff --git a/dbms/src/Parsers/ASTColumnDeclaration.h b/dbms/src/Parsers/ASTColumnDeclaration.h index 845fe175155..ea078ad0052 100644 --- a/dbms/src/Parsers/ASTColumnDeclaration.h +++ b/dbms/src/Parsers/ASTColumnDeclaration.h @@ -5,7 +5,7 @@ namespace DB { -/** Name, type, default-specifier, default-expression. +/** Name, type, default-specifier, default-expression, comment-expression. * The type is optional if default-expression is specified. */ class ASTColumnDeclaration : public IAST @@ -38,13 +38,12 @@ public: if (comment_expression) { res->comment_expression = comment_expression->clone(); - res->children.push_back(res->comment_expression); // TODO: понять, зачем это нужно. + res->children.push_back(res->comment_expression); } return res; } -protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { frame.need_parens = false; @@ -63,8 +62,8 @@ protected: default_expression->formatImpl(settings, state, frame); } - // TODO: понять, почему не отрицание if (comment_expression) { + settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "COMMENT" << (settings.hilite ? hilite_none : "") << ' '; comment_expression->formatImpl(settings, state, frame); } } diff --git a/dbms/src/Parsers/ParserAlterQuery.cpp b/dbms/src/Parsers/ParserAlterQuery.cpp index 56eaddb38ee..65446e5a4db 100644 --- a/dbms/src/Parsers/ParserAlterQuery.cpp +++ b/dbms/src/Parsers/ParserAlterQuery.cpp @@ -24,6 +24,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserKeyword s_clear_column("CLEAR COLUMN"); ParserKeyword s_modify_column("MODIFY COLUMN"); ParserKeyword s_modify_primary_key("MODIFY PRIMARY KEY"); + ParserKeyword s_comment_column("COMMENT COLUMN"); ParserKeyword s_attach_partition("ATTACH PARTITION"); ParserKeyword s_detach_partition("DETACH PARTITION"); @@ -220,6 +221,16 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->type = ASTAlterCommand::UPDATE; } + else if (s_comment_column.ignore(pos, expected)) + { + if (!parser_name.parse(pos, command->column, expected)) + return false; + + if (!parser_string_literal.parse(pos, command->comment, expected)) + return false; + + command->type = ASTAlterCommand::COMMENT_COLUMN; + } else return false; diff --git a/dbms/src/Parsers/ParserAlterQuery.h b/dbms/src/Parsers/ParserAlterQuery.h index 46908ae135d..c7b21ca15d3 100644 --- a/dbms/src/Parsers/ParserAlterQuery.h +++ b/dbms/src/Parsers/ParserAlterQuery.h @@ -13,6 +13,7 @@ namespace DB * [CLEAR COLUMN col_to_clear [IN PARTITION partition],] * [MODIFY COLUMN col_to_modify type, ...] * [MODIFY PRIMARY KEY (a, b, c...)] + * [COMMENT COLUMN col_name string] * [DROP|DETACH|ATTACH PARTITION|PART partition, ...] * [FETCH PARTITION partition FROM ...] * [FREEZE PARTITION] diff --git a/dbms/src/Parsers/ParserCreateQuery.h b/dbms/src/Parsers/ParserCreateQuery.h index 72286c155e4..3931b228c29 100644 --- a/dbms/src/Parsers/ParserCreateQuery.h +++ b/dbms/src/Parsers/ParserCreateQuery.h @@ -113,6 +113,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ParserKeyword s_alias{"ALIAS"}; ParserKeyword s_comment{"COMMENT"}; ParserTernaryOperatorExpression expr_parser; + ParserStringLiteral string_literal_parser; /// mandatory column name ASTPtr name; @@ -120,14 +121,13 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E return false; /** column name should be followed by type name if it - * is not immediately followed by {DEFAULT, MATERIALIZED, ALIAS, COMMENT} + * is not immediately followed by {DEFAULT, MATERIALIZED, ALIAS} */ ASTPtr type; const auto fallback_pos = pos; if (!s_default.check(pos, expected) && !s_materialized.check(pos, expected) && - !s_alias.check(pos, expected) && - !s_comment.check(pos, expected)) + !s_alias.check(pos, expected)) { type_parser.parse(pos, type, expected); } @@ -151,15 +151,10 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E else if (!type) return false; /// reject sole column name without type - String comment_specifier; ASTPtr comment_expression; - pos_before_specifier = pos; if (s_comment.ignore(pos, expected)) { - comment_specifier = Poco::toUpper(std::string{pos_before_specifier->begin, pos_specifier->end}); - if (!expr_parser.parse(pos, comment_expression, expected)) { - return false; - } + string_literal_parser.parse(pos, comment_expression, expected); } const auto column_declaration = std::make_shared(); diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index 9e6d525f685..5b9b6f3b0db 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -90,6 +90,15 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.primary_key = command_ast->primary_key; return command; } + else if (command_ast->type == ASTAlterCommand::COMMENT_COLUMN) + { + AlterCommand command; + command.type = COMMENT_COLUMN; + const auto & ast_identifier = typeid_cast(*command_ast->column); + command.column_name = ast_identifier.name; + command.comment_expression = command_ast->comment; + return command; + } else return {}; } @@ -237,6 +246,11 @@ void AlterCommand::apply(ColumnsDescription & columns_description) const /// This have no relation to changing the list of columns. /// TODO Check that all columns exist, that only columns with constant defaults are added. } + else if (type == COMMENT_COLUMN) + { + + columns_description.comments[column_name].expression = comment_expression; + } else throw Exception("Wrong parameter type in ALTER query", ErrorCodes::LOGICAL_ERROR); } @@ -353,6 +367,15 @@ void AlterCommands::validate(const IStorage & table, const Context & context) throw Exception("Wrong column name. Cannot find column " + command.column_name + " to drop", ErrorCodes::ILLEGAL_COLUMN); } + else if (command.type == AlterCommand::COMMENT_COLUMN) + { + const auto column_it = std::find_if(std::begin(all_columns), std::end(all_columns), + std::bind(namesEqual, std::cref(command.column_name), std::placeholders::_1)); + if (column_it == std::end(all_columns)) + { + throw Exception{"Wrong column name. Cannot find column " + command.column_name + " to comment", ErrorCodes::ILLEGAL_COLUMN}; + } + } } /** Existing defaulted columns may require default expression extensions with a type conversion, diff --git a/dbms/src/Storages/AlterCommands.h b/dbms/src/Storages/AlterCommands.h index 888bd64f03e..1bbf3ddbee0 100644 --- a/dbms/src/Storages/AlterCommands.h +++ b/dbms/src/Storages/AlterCommands.h @@ -21,6 +21,7 @@ struct AlterCommand DROP_COLUMN, MODIFY_COLUMN, MODIFY_PRIMARY_KEY, + COMMENT_COLUMN, }; Type type; @@ -35,6 +36,7 @@ struct AlterCommand ColumnDefaultKind default_kind{}; ASTPtr default_expression{}; + ASTPtr comment_expression; /// For ADD - after which column to add a new one. If an empty string, add to the end. To add to the beginning now it is impossible. String after_column; @@ -45,9 +47,9 @@ struct AlterCommand AlterCommand() = default; AlterCommand(const Type type, const String & column_name, const DataTypePtr & data_type, const ColumnDefaultKind default_kind, const ASTPtr & default_expression, - const String & after_column = String{}) + const String & after_column = String{}, const ASTPtr & comment_expression = nullptr) : type{type}, column_name{column_name}, data_type{data_type}, default_kind{default_kind}, - default_expression{default_expression}, after_column{after_column} + default_expression{default_expression}, comment_expression(comment_expression), after_column{after_column} {} static std::optional parse(const ASTAlterCommand * command); diff --git a/dbms/src/Storages/ColumnComment.cpp b/dbms/src/Storages/ColumnComment.cpp new file mode 100644 index 00000000000..076ec1e19df --- /dev/null +++ b/dbms/src/Storages/ColumnComment.cpp @@ -0,0 +1,7 @@ +#include +#include + +bool DB::operator== (const DB::ColumnComment& lhs, const DB::ColumnComment& rhs) +{ + return queryToString(lhs.expression) == queryToString(rhs.expression); +} diff --git a/dbms/src/Storages/ColumnComment.h b/dbms/src/Storages/ColumnComment.h new file mode 100644 index 00000000000..33e44fb5188 --- /dev/null +++ b/dbms/src/Storages/ColumnComment.h @@ -0,0 +1,19 @@ +#pragma once + +#include +#include + +#include + +namespace DB +{ + +struct ColumnComment { + ASTPtr expression; +}; + +bool operator== (const ColumnComment& lhs, const ColumnComment& rhs); + +using ColumnComments = std::unordered_map; + +} diff --git a/dbms/src/Storages/ColumnsDescription.cpp b/dbms/src/Storages/ColumnsDescription.cpp index cb67d01a4ea..e3e76f6012a 100644 --- a/dbms/src/Storages/ColumnsDescription.cpp +++ b/dbms/src/Storages/ColumnsDescription.cpp @@ -75,22 +75,32 @@ String ColumnsDescription::toString() const { for (const auto & column : columns) { - const auto it = defaults.find(column.name); + const auto defaults_it = defaults.find(column.name); + const auto comments_it = comments.find(column.name); writeBackQuotedString(column.name, buf); writeChar(' ', buf); writeText(column.type->getName(), buf); - if (it == std::end(defaults)) - { - writeChar('\n', buf); - continue; - } - else - writeChar('\t', buf); - writeText(DB::toString(it->second.kind), buf); - writeChar('\t', buf); - writeText(queryToString(it->second.expression), buf); + const bool exist_comment = comments_it != std::end(comments) && !comments_it->second.expression; + if (defaults_it != std::end(defaults)) + { + writeChar('\t', buf); + writeText(DB::toString(defaults_it->second.kind), buf); + writeChar('\t', buf); + writeText(queryToString(defaults_it->second.expression), buf); + } + else if (exist_comment) + { + writeChar('\t', buf); + } + + if (exist_comment) + { + writeChar('\t', buf); + writeText(queryToString(comments_it->second.expression), buf); + } + writeChar('\n', buf); } }; @@ -102,6 +112,55 @@ String ColumnsDescription::toString() const return buf.str(); } +struct ParsedDefaultInfo +{ + ColumnDefaultKind default_kind; + ASTPtr default_expr_str; +}; + +std::optional parseDefaulfInfo(ReadBufferFromString & buf) +{ + if (*buf.position() == '\n') + { + return {}; + } + + assertChar('\t', buf); + if (*buf.position() == '\t') + { + assertChar('\t', buf); + return {}; + } + + String default_kind_str; + readText(default_kind_str, buf); + const auto default_kind = columnDefaultKindFromString(default_kind_str); + assertChar('\t', buf); + + ParserExpression expr_parser; + String default_expr_str; + readText(default_expr_str, buf); + const char * begin = default_expr_str.data(); + const auto end = begin + default_expr_str.size(); + ASTPtr default_expr = parseQuery(expr_parser, begin, end, "default_expression", 0); + return ParsedDefaultInfo{default_kind, std::move(default_expr)}; +} + +ASTPtr parseCommentExpr(ReadBufferFromString& buf) +{ + if (*buf.position() == '\n') + { + return {}; + } + + ParserExpression parser_expr; + String comment_expr_str; + readText(comment_expr_str, buf); + const char * begin = comment_expr_str.data(); + const auto end = begin + comment_expr_str.size(); + ASTPtr comment_expr = parseQuery(parser_expr, begin, end, "comment_expression", 0); + return comment_expr; +} ColumnsDescription ColumnsDescription::parse(const String & str) { @@ -132,29 +191,31 @@ ColumnsDescription ColumnsDescription::parse(const String & str) result.ordinary.emplace_back(column_name, std::move(type)); continue; } + assertChar('\t', buf); - String default_kind_str; - readText(default_kind_str, buf); - const auto default_kind = columnDefaultKindFromString(default_kind_str); - assertChar('\t', buf); + const auto default_info = parseDefaulfInfo(buf); + if (default_info) + { + const auto & default_kind = default_info->default_kind; + const auto & default_expr = default_info->default_expr_str; + if (ColumnDefaultKind::Default == default_kind) + result.ordinary.emplace_back(column_name, std::move(type)); + else if (ColumnDefaultKind::Materialized == default_kind) + result.materialized.emplace_back(column_name, std::move(type)); + else if (ColumnDefaultKind::Alias == default_kind) + result.aliases.emplace_back(column_name, std::move(type)); + + result.defaults.emplace(column_name, ColumnDefault{default_kind, default_expr}); + } + + const auto comment_expr = parseCommentExpr(buf); + if (comment_expr) + { + result.comments.emplace(column_name, ColumnComment{comment_expr}); + } - String default_expr_str; - readText(default_expr_str, buf); assertChar('\n', buf); - - const char * begin = default_expr_str.data(); - const auto end = begin + default_expr_str.size(); - ASTPtr default_expr = parseQuery(expr_parser, begin, end, "default expression", 0); - - if (ColumnDefaultKind::Default == default_kind) - result.ordinary.emplace_back(column_name, std::move(type)); - else if (ColumnDefaultKind::Materialized == default_kind) - result.materialized.emplace_back(column_name, std::move(type)); - else if (ColumnDefaultKind::Alias == default_kind) - result.aliases.emplace_back(column_name, std::move(type)); - - result.defaults.emplace(column_name, ColumnDefault{default_kind, default_expr}); } assertEOF(buf); diff --git a/dbms/src/Storages/ColumnsDescription.h b/dbms/src/Storages/ColumnsDescription.h index 288d2712b3b..69738d83861 100644 --- a/dbms/src/Storages/ColumnsDescription.h +++ b/dbms/src/Storages/ColumnsDescription.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -15,6 +16,7 @@ struct ColumnsDescription NamesAndTypesList materialized; NamesAndTypesList aliases; ColumnDefaults defaults; + ColumnComments comments; ColumnsDescription() = default; @@ -22,11 +24,13 @@ struct ColumnsDescription NamesAndTypesList ordinary_, NamesAndTypesList materialized_, NamesAndTypesList aliases_, - ColumnDefaults defaults_) + ColumnDefaults defaults_, + ColumnComments comments_ = {}) : ordinary(std::move(ordinary_)) , materialized(std::move(materialized_)) , aliases(std::move(aliases_)) , defaults(std::move(defaults_)) + , comments(std::move(comments_)) {} explicit ColumnsDescription(NamesAndTypesList ordinary_) : ordinary(std::move(ordinary_)) {} @@ -36,7 +40,8 @@ struct ColumnsDescription return ordinary == other.ordinary && materialized == other.materialized && aliases == other.aliases - && defaults == other.defaults; + && defaults == other.defaults + && comments == other.comments; } bool operator!=(const ColumnsDescription & other) const { return !(*this == other); } diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index d256a5628f7..d889190ce45 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include #include #include @@ -233,9 +235,19 @@ public: * This method must fully execute the ALTER query, taking care of the locks itself. * To update the table metadata on disk, this method should call InterpreterAlterQuery::updateMetadata. */ - virtual void alter(const AlterCommands & /*params*/, const String & /*database_name*/, const String & /*table_name*/, const Context & /*context*/) + virtual void alter(const AlterCommands & params, const String & database_name, const String & table_name, const Context & context) { - throw Exception("Method alter is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + for (const auto & param : params) + { + if (param.type != AlterCommand::Type::COMMENT_COLUMN) + throw Exception("Method alter only supports change comment of column for storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + } + + auto lock = lockStructureForAlter(__PRETTY_FUNCTION__); + auto new_columns = getColumns(); + params.apply(new_columns); + context.getDatabase(database_name)->alterTable(context, table_name, new_columns, {}); + setColumns(std::move(new_columns)); } /** Execute CLEAR COLUMN ... IN PARTITION query which removes column from given partition. */ diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 0a55bfa97e5..f8c21a2d675 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -909,6 +909,11 @@ void MergeTreeData::checkAlter(const AlterCommands & commands) for (const AlterCommand & command : commands) { + if (command.type == AlterCommand::COMMENT_COLUMN) + { + continue; + } + if (columns_alter_forbidden.count(command.column_name)) throw Exception("trying to ALTER key column " + command.column_name, ErrorCodes::ILLEGAL_COLUMN); diff --git a/dbms/src/Storages/System/StorageSystemColumns.cpp b/dbms/src/Storages/System/StorageSystemColumns.cpp index 40802f16466..b546a275f0b 100644 --- a/dbms/src/Storages/System/StorageSystemColumns.cpp +++ b/dbms/src/Storages/System/StorageSystemColumns.cpp @@ -37,6 +37,7 @@ StorageSystemColumns::StorageSystemColumns(const std::string & name_) { "data_compressed_bytes", std::make_shared() }, { "data_uncompressed_bytes", std::make_shared() }, { "marks_bytes", std::make_shared() }, + { "comment", std::make_shared() }, })); } @@ -81,6 +82,7 @@ protected: NamesAndTypesList columns; ColumnDefaults column_defaults; + ColumnComments column_comments; MergeTreeData::ColumnSizeByName column_sizes; { @@ -106,6 +108,7 @@ protected: columns = storage->getColumns().getAll(); column_defaults = storage->getColumns().defaults; + column_comments = storage->getColumns().comments; /** Info about sizes of columns for tables of MergeTree family. * NOTE: It is possible to add getter for this info to IStorage interface. @@ -174,6 +177,21 @@ protected: } } + { + const auto it = column_comments.find(column.name); + if (it == std::end(column_comments)) + { + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + } + else + { + const auto & literal = typeid_cast(it->second.expression.get()); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(literal->value.get()); + } + } + ++rows_count; } } diff --git a/dbms/tests/queries/0_stateless/00725_comment_columns.reference b/dbms/tests/queries/0_stateless/00725_comment_columns.reference new file mode 100644 index 00000000000..8d7837d8a31 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00725_comment_columns.reference @@ -0,0 +1,28 @@ +CREATE TABLE default.check_query_comment_column ( first_column UInt8 DEFAULT 1 COMMENT \'first comment\', fourth_column UInt8 COMMENT \'fourth comment\', fifth_column UInt8, second_column UInt8 MATERIALIZED first_column COMMENT \'second comment\', third_column UInt8 ALIAS second_column COMMENT \'third comment\') ENGINE = TinyLog +┌─table──────────────────────┬─name──────────┬─comment────────┐ +│ check_query_comment_column │ first_column │ first comment │ +│ check_query_comment_column │ fourth_column │ fourth comment │ +│ check_query_comment_column │ fifth_column │ │ +│ check_query_comment_column │ second_column │ second comment │ +│ check_query_comment_column │ third_column │ third comment │ +└────────────────────────────┴───────────────┴────────────────┘ +CREATE TABLE default.check_query_comment_column ( first_column UInt8 DEFAULT 1 COMMENT \'another first column\', fourth_column UInt8 COMMENT \'another fourth column\', fifth_column UInt8 COMMENT \'another fifth column\', second_column UInt8 MATERIALIZED first_column COMMENT \'another second column\', third_column UInt8 ALIAS second_column COMMENT \'another third column\') ENGINE = TinyLog +┌─table──────────────────────┬─name──────────┬─comment───────────────┐ +│ check_query_comment_column │ first_column │ another first column │ +│ check_query_comment_column │ fourth_column │ another fourth column │ +│ check_query_comment_column │ fifth_column │ another fifth column │ +│ check_query_comment_column │ second_column │ another second column │ +│ check_query_comment_column │ third_column │ another third column │ +└────────────────────────────┴───────────────┴───────────────────────┘ +CREATE TABLE default.check_query_comment_column ( first_column Date COMMENT \'first comment\', second_column UInt8 COMMENT \'second comment\', third_column UInt8 COMMENT \'third comment\') ENGINE = MergeTree(first_column, (second_column, second_column), 8192) +┌─table──────────────────────┬─name──────────┬─comment────────┐ +│ check_query_comment_column │ first_column │ first comment │ +│ check_query_comment_column │ second_column │ second comment │ +│ check_query_comment_column │ third_column │ third comment │ +└────────────────────────────┴───────────────┴────────────────┘ +CREATE TABLE default.check_query_comment_column ( first_column Date COMMENT \'another first comment\', second_column UInt8 COMMENT \'another second comment\', third_column UInt8 COMMENT \'another third comment\') ENGINE = MergeTree(first_column, (second_column, second_column), 8192) +┌─table──────────────────────┬─name──────────┬─comment────────────────┐ +│ check_query_comment_column │ first_column │ another first comment │ +│ check_query_comment_column │ second_column │ another second comment │ +│ check_query_comment_column │ third_column │ another third comment │ +└────────────────────────────┴───────────────┴────────────────────────┘ diff --git a/dbms/tests/queries/0_stateless/00725_comment_columns.sql b/dbms/tests/queries/0_stateless/00725_comment_columns.sql new file mode 100644 index 00000000000..6c2bd2305b1 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00725_comment_columns.sql @@ -0,0 +1,60 @@ +DROP TABLE IF EXISTS check_query_comment_column; + +CREATE TABLE check_query_comment_column + ( + first_column UInt8 DEFAULT 1 COMMENT 'first comment', + second_column UInt8 MATERIALIZED first_column COMMENT 'second comment', + third_column UInt8 ALIAS second_column COMMENT 'third comment', + fourth_column UInt8 COMMENT 'fourth comment', + fifth_column UInt8 + ) ENGINE = TinyLog; + +SHOW CREATE TABLE check_query_comment_column; + +SELECT table, name, comment +FROM system.columns +WHERE table = 'check_query_comment_column' +FORMAT PrettyCompactNoEscapes; + +ALTER TABLE check_query_comment_column + COMMENT COLUMN first_column 'another first column', + COMMENT COLUMN second_column 'another second column', + COMMENT COLUMN third_column 'another third column', + COMMENT COLUMN fourth_column 'another fourth column', + COMMENT COLUMN fifth_column 'another fifth column'; + +SHOW CREATE TABLE check_query_comment_column; + +SELECT table, name, comment +FROM system.columns +WHERE table = 'check_query_comment_column' +FORMAT PrettyCompactNoEscapes; + +DROP TABLE IF EXISTS check_query_comment_column; + + +CREATE TABLE check_query_comment_column + ( + first_column Date COMMENT 'first comment', + second_column UInt8 COMMENT 'second comment', + third_column UInt8 COMMENT 'third comment' + ) ENGINE = MergeTree(first_column, (second_column, second_column), 8192); + +SHOW CREATE TABLE check_query_comment_column; + +SELECT table, name, comment +FROM system.columns +WHERE table = 'check_query_comment_column' +FORMAT PrettyCompactNoEscapes; + +ALTER TABLE check_query_comment_column + COMMENT COLUMN first_column 'another first comment', + COMMENT COLUMN second_column 'another second comment', + COMMENT COLUMN third_column 'another third comment'; + +SHOW CREATE TABLE check_query_comment_column; + +SELECT table, name, comment +FROM system.columns +WHERE table = 'check_query_comment_column' +FORMAT PrettyCompactNoEscapes; \ No newline at end of file From 6d783ef013cdd1b919f5f30626f0eb2cc35cbbd7 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Wed, 17 Oct 2018 14:45:14 +0300 Subject: [PATCH 003/145] Move-away "uniqCombined" as a separate aggregated function with HLL precision --- dbms/scripts/test_uniq_functions.sh | 2 +- .../AggregateFunctionUniq.cpp | 3 - .../AggregateFunctionUniq.h | 88 ---- .../AggregateFunctionUniqCombined.cpp | 90 ++++ .../AggregateFunctionUniqCombined.h | 429 ++++++++++++++++++ .../registerAggregateFunctions.cpp | 2 + .../tests/performance/test_hits/test_hits.xml | 4 +- .../00212_shard_aggregate_function_uniq.sql | 26 +- .../0_stateless/00264_uniq_many_args.sql | 10 +- .../0_stateless/00700_decimal_aggregates.sql | 2 +- .../00146_aggregate_function_uniq.sql | 2 +- .../query_language/agg_functions/reference.md | 2 +- .../query_language/agg_functions/reference.md | 2 +- 13 files changed, 546 insertions(+), 116 deletions(-) create mode 100644 dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp create mode 100644 dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h diff --git a/dbms/scripts/test_uniq_functions.sh b/dbms/scripts/test_uniq_functions.sh index 9a4b6f20433..f7e2083610b 100755 --- a/dbms/scripts/test_uniq_functions.sh +++ b/dbms/scripts/test_uniq_functions.sh @@ -6,6 +6,6 @@ do do n=$(( 10**p * i )) echo -n "$n " - clickhouse-client -q "select uniqHLL12(number), uniq(number), uniqCombined(number) from numbers($n);" + clickhouse-client -q "select uniqHLL12(number), uniq(number), uniqCombined(15)(number) from numbers($n);" done done diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniq.cpp b/dbms/src/AggregateFunctions/AggregateFunctionUniq.cpp index 77b6c9cfb97..6b63a719b8f 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniq.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniq.cpp @@ -130,9 +130,6 @@ void registerAggregateFunctionsUniq(AggregateFunctionFactory & factory) factory.registerFunction("uniqExact", createAggregateFunctionUniq>); - - factory.registerFunction("uniqCombined", - createAggregateFunctionUniq>); } } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniq.h b/dbms/src/AggregateFunctions/AggregateFunctionUniq.h index 140928959a3..980d62b40ec 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniq.h @@ -22,7 +22,6 @@ #include #include -#include #include @@ -124,46 +123,6 @@ struct AggregateFunctionUniqExactData static String getName() { return "uniqExact"; } }; -template -struct AggregateFunctionUniqCombinedData -{ - using Key = UInt32; - using Set = CombinedCardinalityEstimator< - Key, - HashSet>, - 16, - 14, - 17, - TrivialHash, - UInt32, - HyperLogLogBiasEstimator, - HyperLogLogMode::FullFeatured>; - - Set set; - - static String getName() { return "uniqCombined"; } -}; - -template <> -struct AggregateFunctionUniqCombinedData -{ - using Key = UInt64; - using Set = CombinedCardinalityEstimator< - Key, - HashSet>, - 16, - 14, - 17, - TrivialHash, - UInt64, - HyperLogLogBiasEstimator, - HyperLogLogMode::FullFeatured>; - - Set set; - - static String getName() { return "uniqCombined"; } -}; - namespace detail { @@ -199,39 +158,6 @@ template <> struct AggregateFunctionUniqTraits } }; -/** Hash function for uniqCombined. - */ -template struct AggregateFunctionUniqCombinedTraits -{ - static UInt32 hash(T x) { return static_cast(intHash64(x)); } -}; - -template <> struct AggregateFunctionUniqCombinedTraits -{ - static UInt32 hash(UInt128 x) - { - return sipHash64(x); - } -}; - -template <> struct AggregateFunctionUniqCombinedTraits -{ - static UInt32 hash(Float32 x) - { - UInt64 res = ext::bit_cast(x); - return static_cast(intHash64(res)); - } -}; - -template <> struct AggregateFunctionUniqCombinedTraits -{ - static UInt32 hash(Float64 x) - { - UInt64 res = ext::bit_cast(x); - return static_cast(intHash64(res)); - } -}; - /** The structure for the delegation work to add one element to the `uniq` aggregate functions. * Used for partial specialization to add strings. @@ -255,19 +181,6 @@ struct OneAdder data.set.insert(CityHash_v1_0_2::CityHash64(value.data, value.size)); } } - else if constexpr (std::is_same_v>) - { - if constexpr (!std::is_same_v) - { - const auto & value = static_cast &>(column).getData()[row_num]; - data.set.insert(AggregateFunctionUniqCombinedTraits::hash(value)); - } - else - { - StringRef value = column.getDataAt(row_num); - data.set.insert(CityHash_v1_0_2::CityHash64(value.data, value.size)); - } - } else if constexpr (std::is_same_v>) { if constexpr (!std::is_same_v) @@ -387,5 +300,4 @@ public: const char * getHeaderFilePath() const override { return __FILE__; } }; - } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp new file mode 100644 index 00000000000..cdaf5b90c70 --- /dev/null +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp @@ -0,0 +1,90 @@ +#include + +#include +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ARGUMENT_OUT_OF_BOUND; +} + +namespace +{ + +AggregateFunctionPtr createAggregateFunctionUniqCombined( + const std::string & name, const DataTypes & argument_types, const Array & params) +{ + UInt8 precision = 17; /// default value - must correlate with default ctor of |AggregateFunctionUniqCombinedData| + + if (!params.empty()) + { + if (params.size() != 1) + throw Exception( + "Aggregate function " + name + " requires one parameter or less.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + UInt64 precision_param = applyVisitor(FieldVisitorConvertToNumber(), params[0]); + + // This range is hardcoded into |AggregateFunctionUniqCombinedData| + if (precision_param > 20 || precision_param < 12) + throw Exception( + "Parameter for aggregate function " + name + "is out or range: [12, 20].", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + + precision = precision_param; + } + + if (argument_types.empty()) + throw Exception("Incorrect number of arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + /// We use exact hash function if the user wants it; + /// or if the arguments are not contiguous in memory, because only exact hash function have support for this case. + bool use_exact_hash_function = !isAllArgumentsContiguousInMemory(argument_types); + + if (argument_types.size() == 1) + { + const IDataType & argument_type = *argument_types[0]; + + AggregateFunctionPtr res(createWithNumericType(*argument_types[0], precision)); + + WhichDataType which(argument_type); + if (res) + return res; + else if (which.isDate()) + return std::make_shared>(precision); + else if (which.isDateTime()) + return std::make_shared>(precision); + else if (which.isStringOrFixedString()) + return std::make_shared>(precision); + else if (which.isUUID()) + return std::make_shared>(precision); + else if (which.isTuple()) + { + if (use_exact_hash_function) + return std::make_shared>(argument_types, precision); + else + return std::make_shared>(argument_types, precision); + } + } + + /// "Variadic" method also works as a fallback generic case for single argument. + if (use_exact_hash_function) + return std::make_shared>(argument_types, precision); + else + return std::make_shared>(argument_types, precision); +} + +} // namespace + +void registerAggregateFunctionUniqCombined(AggregateFunctionFactory & factory) +{ + factory.registerFunction("uniqCombined", createAggregateFunctionUniqCombined); +} + +} // namespace DB diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h new file mode 100644 index 00000000000..8d8a7c6745d --- /dev/null +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h @@ -0,0 +1,429 @@ +#pragma once + +#include + +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include + +namespace DB +{ +namespace detail +{ + /** Hash function for uniqCombined. + */ + template + struct AggregateFunctionUniqCombinedTraits + { + static UInt32 hash(T x) + { + return static_cast(intHash64(x)); + } + }; + + template <> + struct AggregateFunctionUniqCombinedTraits + { + static UInt32 hash(UInt128 x) + { + return sipHash64(x); + } + }; + + template <> + struct AggregateFunctionUniqCombinedTraits + { + static UInt32 hash(Float32 x) + { + UInt64 res = ext::bit_cast(x); + return static_cast(intHash64(res)); + } + }; + + template <> + struct AggregateFunctionUniqCombinedTraits + { + static UInt32 hash(Float64 x) + { + UInt64 res = ext::bit_cast(x); + return static_cast(intHash64(res)); + } + }; + +} // namespace detail + + +template +struct __attribute__((__packed__)) AggregateFunctionUniqCombinedDataWithKey +{ + template + using Set = CombinedCardinalityEstimator>, + 16, + K - 3, + K, + TrivialHash, + Key, + HyperLogLogBiasEstimator, + HyperLogLogMode::FullFeatured>; + + mutable UInt8 inited = 0; + union + { + Set<12> set_12; + Set<13> set_13; + Set<14> set_14; + Set<15> set_15; + Set<16> set_16; + Set<17> set_17; + Set<18> set_18; + Set<19> set_19; + Set<20> set_20; + }; + + AggregateFunctionUniqCombinedDataWithKey() : set_17() {} + + ~AggregateFunctionUniqCombinedDataWithKey() + { + switch (inited) + { + case 12: + set_12.~CombinedCardinalityEstimator(); + break; + case 13: + set_13.~CombinedCardinalityEstimator(); + break; + case 14: + set_14.~CombinedCardinalityEstimator(); + break; + case 15: + set_15.~CombinedCardinalityEstimator(); + break; + case 16: + set_16.~CombinedCardinalityEstimator(); + break; + case 0: + case 17: + set_17.~CombinedCardinalityEstimator(); + break; + case 18: + set_18.~CombinedCardinalityEstimator(); + break; + case 19: + set_19.~CombinedCardinalityEstimator(); + break; + case 20: + set_20.~CombinedCardinalityEstimator(); + break; + } + } + + void init(UInt8 precision) const + { + if (inited || precision == 17) + return; + + // TODO: assert "inited == precision" + + set_17.~CombinedCardinalityEstimator(); + + switch (precision) + { + case 12: + new (&set_12) Set<12>; + break; + case 13: + new (&set_13) Set<13>; + break; + case 14: + new (&set_14) Set<14>; + break; + case 15: + new (&set_15) Set<15>; + break; + case 16: + new (&set_16) Set<16>; + break; + case 18: + new (&set_18) Set<18>; + break; + case 19: + new (&set_19) Set<19>; + break; + case 20: + new (&set_20) Set<20>; + break; + } + inited = precision; + } + +#define SET_METHOD(method) \ + switch (inited) \ + { \ + case 12: \ + set_12.method; \ + break; \ + case 13: \ + set_13.method; \ + break; \ + case 14: \ + set_14.method; \ + break; \ + case 15: \ + set_15.method; \ + break; \ + case 16: \ + set_16.method; \ + break; \ + case 17: \ + set_17.method; \ + break; \ + case 18: \ + set_18.method; \ + break; \ + case 19: \ + set_19.method; \ + break; \ + case 20: \ + set_20.method; \ + break; \ + } + +#define SET_RETURN_METHOD(method) \ + switch (inited) \ + { \ + case 12: \ + return set_12.method; \ + case 13: \ + return set_13.method; \ + case 14: \ + return set_14.method; \ + case 15: \ + return set_15.method; \ + case 16: \ + return set_16.method; \ + case 18: \ + return set_18.method; \ + case 19: \ + return set_19.method; \ + case 20: \ + return set_20.method; \ + case 17: \ + default: \ + return set_17.method; \ + } + + void insert(Key value, UInt8 precision) + { + init(precision); + SET_METHOD(insert(value)); + } + + void merge(const AggregateFunctionUniqCombinedDataWithKey & rhs, UInt8 precision) + { + init(precision); + switch (inited) + { + case 12: + set_12.merge(rhs.set_12); + break; + case 13: + set_13.merge(rhs.set_13); + break; + case 14: + set_14.merge(rhs.set_14); + break; + case 15: + set_15.merge(rhs.set_15); + break; + case 16: + set_16.merge(rhs.set_16); + break; + case 17: + set_17.merge(rhs.set_17); + break; + case 18: + set_18.merge(rhs.set_18); + break; + case 19: + set_19.merge(rhs.set_19); + break; + case 20: + set_20.merge(rhs.set_20); + break; + } + } + + void write(DB::WriteBuffer & out, UInt8 precision) const + { + init(precision); + SET_METHOD(write(out)); + } + + void read(DB::ReadBuffer & in, UInt8 precision) + { + init(precision); + SET_METHOD(read(in)); + } + + UInt32 size(UInt8 precision) const + { + init(precision); + SET_RETURN_METHOD(size()); + } + +#undef SET_METHOD +#undef SET_RETURN_METHOD +}; + + +template +struct __attribute__((__packed__)) AggregateFunctionUniqCombinedData : public AggregateFunctionUniqCombinedDataWithKey +{ +}; + + +template <> +struct __attribute__((__packed__)) AggregateFunctionUniqCombinedData : public AggregateFunctionUniqCombinedDataWithKey +{ +}; + + +template +class AggregateFunctionUniqCombined final + : public IAggregateFunctionDataHelper, AggregateFunctionUniqCombined> +{ +private: + const UInt8 precision; + +public: + explicit AggregateFunctionUniqCombined(UInt8 precision) : precision(precision) {} + + String getName() const override + { + return "uniqCombined"; + } + + DataTypePtr getReturnType() const override + { + return std::make_shared(); + } + + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override + { + if constexpr (!std::is_same_v) + { + const auto & value = static_cast &>(*columns[0]).getData()[row_num]; + this->data(place).insert(detail::AggregateFunctionUniqCombinedTraits::hash(value), precision); + } + else + { + StringRef value = columns[0]->getDataAt(row_num); + this->data(place).insert(CityHash_v1_0_2::CityHash64(value.data, value.size), precision); + } + } + + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override + { + this->data(place).merge(this->data(rhs), precision); + } + + void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + { + this->data(place).write(buf, precision); + } + + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override + { + this->data(place).read(buf, precision); + } + + void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override + { + static_cast(to).getData().push_back(this->data(place).size(precision)); + } + + const char * getHeaderFilePath() const override + { + return __FILE__; + } +}; + +/** For multiple arguments. To compute, hashes them. + * You can pass multiple arguments as is; You can also pass one argument - a tuple. + * But (for the possibility of efficient implementation), you can not pass several arguments, among which there are tuples. + */ +template +class AggregateFunctionUniqCombinedVariadic final : public IAggregateFunctionDataHelper, + AggregateFunctionUniqCombinedVariadic> +{ +private: + size_t num_args = 0; + UInt8 precision; + +public: + AggregateFunctionUniqCombinedVariadic(const DataTypes & arguments, UInt8 precision) : precision(precision) + { + if (argument_is_tuple) + num_args = typeid_cast(*arguments[0]).getElements().size(); + else + num_args = arguments.size(); + } + + String getName() const override + { + return "uniqCombined"; + } + + DataTypePtr getReturnType() const override + { + return std::make_shared(); + } + + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override + { + this->data(place).insert(typename AggregateFunctionUniqCombinedData::Set<12>::value_type( + UniqVariadicHash::apply(num_args, columns, row_num)), + precision); + } + + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override + { + this->data(place).merge(this->data(rhs), precision); + } + + void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + { + this->data(place).write(buf, precision); + } + + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override + { + this->data(place).read(buf, precision); + } + + void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override + { + static_cast(to).getData().push_back(this->data(place).size(precision)); + } + + const char * getHeaderFilePath() const override + { + return __FILE__; + } +}; + +} // namespace DB diff --git a/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp b/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp index 3517ad57a73..800beda1d53 100644 --- a/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp +++ b/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp @@ -21,6 +21,7 @@ void registerAggregateFunctionsStatisticsSimple(AggregateFunctionFactory &); void registerAggregateFunctionSum(AggregateFunctionFactory &); void registerAggregateFunctionSumMap(AggregateFunctionFactory &); void registerAggregateFunctionsUniq(AggregateFunctionFactory &); +void registerAggregateFunctionUniqCombined(AggregateFunctionFactory &); void registerAggregateFunctionUniqUpTo(AggregateFunctionFactory &); void registerAggregateFunctionTopK(AggregateFunctionFactory &); void registerAggregateFunctionsBitwise(AggregateFunctionFactory &); @@ -55,6 +56,7 @@ void registerAggregateFunctions() registerAggregateFunctionSum(factory); registerAggregateFunctionSumMap(factory); registerAggregateFunctionsUniq(factory); + registerAggregateFunctionUniqCombined(factory); registerAggregateFunctionUniqUpTo(factory); registerAggregateFunctionTopK(factory); registerAggregateFunctionsBitwise(factory); diff --git a/dbms/tests/performance/test_hits/test_hits.xml b/dbms/tests/performance/test_hits/test_hits.xml index c9e30227ff0..eea308fdd64 100644 --- a/dbms/tests/performance/test_hits/test_hits.xml +++ b/dbms/tests/performance/test_hits/test_hits.xml @@ -86,8 +86,8 @@ PageCharset тоже почти всегда непуст, но его сред SELECT uniq(UserID) FROM test.hits SETTINGS max_threads = 1 SELECT uniq(UserID) FROM test.hits -SELECT uniqCombined(UserID) FROM test.hits SETTINGS max_threads = 1 -SELECT uniqCombined(UserID) FROM test.hits +SELECT uniqCombined(15)(UserID) FROM test.hits SETTINGS max_threads = 1 +SELECT uniqCombined(15)(UserID) FROM test.hits SELECT uniqExact(UserID) FROM test.hits SETTINGS max_threads = 1 SELECT uniqExact(UserID) FROM test.hits diff --git a/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.sql b/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.sql index 53b5ec0001b..b3d82b71685 100644 --- a/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.sql +++ b/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.sql @@ -20,20 +20,20 @@ SELECT uniqHLL12(dummy) FROM remote('127.0.0.{2,3}', system.one); /* uniqCombined */ -SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; -SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; -SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT Y, uniqCombined(15)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(15)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(15)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; -SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; -SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; -SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT Y, uniqCombined(15)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(15)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(15)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; -SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; -SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; -SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT Y, uniqCombined(15)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(15)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(15)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; -SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; -SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; -SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT Y, uniqCombined(15)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(15)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(15)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; -SELECT uniqCombined(dummy) FROM remote('127.0.0.{2,3}', system.one); +SELECT uniqCombined(15)(dummy) FROM remote('127.0.0.{2,3}', system.one); diff --git a/dbms/tests/queries/0_stateless/00264_uniq_many_args.sql b/dbms/tests/queries/0_stateless/00264_uniq_many_args.sql index 2b24e68910c..a94ec6e8f0a 100644 --- a/dbms/tests/queries/0_stateless/00264_uniq_many_args.sql +++ b/dbms/tests/queries/0_stateless/00264_uniq_many_args.sql @@ -1,12 +1,12 @@ -SELECT +SELECT uniq(x), uniq((x)), uniq(x, y), uniq((x, y)), uniq(x, y, z), uniq((x, y, z)), - uniqCombined(x), uniqCombined((x)), uniqCombined(x, y), uniqCombined((x, y)), uniqCombined(x, y, z), uniqCombined((x, y, z)), + uniqCombined(15)(x), uniqCombined(15)((x)), uniqCombined(15)(x, y), uniqCombined(15)((x, y)), uniqCombined(15)(x, y, z), uniqCombined(15)((x, y, z)), uniqHLL12(x), uniqHLL12((x)), uniqHLL12(x, y), uniqHLL12((x, y)), uniqHLL12(x, y, z), uniqHLL12((x, y, z)), uniqExact(x), uniqExact((x)), uniqExact(x, y), uniqExact((x, y)), uniqExact(x, y, z), uniqExact((x, y, z)), uniqUpTo(5)(x), uniqUpTo(5)((x)), uniqUpTo(5)(x, y), uniqUpTo(5)((x, y)), uniqUpTo(5)(x, y, z), uniqUpTo(5)((x, y, z)) FROM ( - SELECT + SELECT number % 10 AS x, intDiv(number, 10) % 10 AS y, toString(intDiv(number, 100) % 10) AS z @@ -16,14 +16,14 @@ FROM SELECT k, uniq(x), uniq((x)), uniq(x, y), uniq((x, y)), uniq(x, y, z), uniq((x, y, z)), - uniqCombined(x), uniqCombined((x)), uniqCombined(x, y), uniqCombined((x, y)), uniqCombined(x, y, z), uniqCombined((x, y, z)), + uniqCombined(15)(x), uniqCombined(15)((x)), uniqCombined(15)(x, y), uniqCombined(15)((x, y)), uniqCombined(15)(x, y, z), uniqCombined(15)((x, y, z)), uniqHLL12(x), uniqHLL12((x)), uniqHLL12(x, y), uniqHLL12((x, y)), uniqHLL12(x, y, z), uniqHLL12((x, y, z)), uniqExact(x), uniqExact((x)), uniqExact(x, y), uniqExact((x, y)), uniqExact(x, y, z), uniqExact((x, y, z)), uniqUpTo(5)(x), uniqUpTo(5)((x)), uniqUpTo(5)(x, y), uniqUpTo(5)((x, y)), uniqUpTo(5)(x, y, z), uniqUpTo(5)((x, y, z)), count() AS c FROM ( - SELECT + SELECT (number + 0x8ffcbd8257219a26) * 0x66bb3430c06d2353 % 131 AS k, number % 10 AS x, intDiv(number, 10) % 10 AS y, diff --git a/dbms/tests/queries/0_stateless/00700_decimal_aggregates.sql b/dbms/tests/queries/0_stateless/00700_decimal_aggregates.sql index e10f665ad02..ba097ada98b 100644 --- a/dbms/tests/queries/0_stateless/00700_decimal_aggregates.sql +++ b/dbms/tests/queries/0_stateless/00700_decimal_aggregates.sql @@ -28,7 +28,7 @@ SELECT avg(a), avg(b), avg(c) FROM test.decimal WHERE a > 0; SELECT avg(a), avg(b), avg(c) FROM test.decimal WHERE a < 0; SELECT (uniq(a), uniq(b), uniq(c)), - (uniqCombined(a), uniqCombined(b), uniqCombined(c)), + (uniqCombined(15)(a), uniqCombined(15)(b), uniqCombined(15)(c)), (uniqExact(a), uniqExact(b), uniqExact(c)), (uniqHLL12(a), uniqHLL12(b), uniqHLL12(c)) FROM (SELECT * FROM test.decimal ORDER BY a); diff --git a/dbms/tests/queries/1_stateful/00146_aggregate_function_uniq.sql b/dbms/tests/queries/1_stateful/00146_aggregate_function_uniq.sql index fd3fde7636d..0f860948a3a 100644 --- a/dbms/tests/queries/1_stateful/00146_aggregate_function_uniq.sql +++ b/dbms/tests/queries/1_stateful/00146_aggregate_function_uniq.sql @@ -1,3 +1,3 @@ SELECT RegionID, uniqHLL12(WatchID) AS X FROM remote('127.0.0.{1,2}', test, hits) GROUP BY RegionID HAVING X > 100000 ORDER BY RegionID ASC; -SELECT RegionID, uniqCombined(WatchID) AS X FROM remote('127.0.0.{1,2}', test, hits) GROUP BY RegionID HAVING X > 100000 ORDER BY RegionID ASC; +SELECT RegionID, uniqCombined(15)(WatchID) AS X FROM remote('127.0.0.{1,2}', test, hits) GROUP BY RegionID HAVING X > 100000 ORDER BY RegionID ASC; SELECT abs(uniq(WatchID) - uniqExact(WatchID)) FROM test.hits; diff --git a/docs/en/query_language/agg_functions/reference.md b/docs/en/query_language/agg_functions/reference.md index fd0bb213933..491a710fe5b 100644 --- a/docs/en/query_language/agg_functions/reference.md +++ b/docs/en/query_language/agg_functions/reference.md @@ -146,7 +146,7 @@ The result is determinate (it doesn't depend on the order of query processing). This function provides excellent accuracy even for data sets with extremely high cardinality (over 10 billion elements). It is recommended for default use. -## uniqCombined(x) +## uniqCombined(HLL_precision)(x) Calculates the approximate number of different values of the argument. Works for numbers, strings, dates, date-with-time, and for multiple arguments and tuple arguments. diff --git a/docs/ru/query_language/agg_functions/reference.md b/docs/ru/query_language/agg_functions/reference.md index 1c700851ef0..d42f216c6ea 100644 --- a/docs/ru/query_language/agg_functions/reference.md +++ b/docs/ru/query_language/agg_functions/reference.md @@ -156,7 +156,7 @@ GROUP BY timeslot Данная функция обеспечивает отличную точность даже для множеств огромной кардинальности (10B+ элементов) и рекомендуется к использованию по умолчанию. -## uniqCombined(x) +## uniqCombined(HLL_precision)(x) Приближённо вычисляет количество различных значений аргумента. Работает для чисел, строк, дат, дат-с-временем, для нескольких аргументов и аргументов-кортежей. From e16081d4d4cc23aea65ce56b4d9f4ad3d110c465 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Thu, 18 Oct 2018 14:15:47 +0300 Subject: [PATCH 004/145] Fix initialization of precision. --- .../src/AggregateFunctions/AggregateFunctionUniqCombined.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h index 8d8a7c6745d..7d8bd487345 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h @@ -133,9 +133,14 @@ struct __attribute__((__packed__)) AggregateFunctionUniqCombinedDataWithKey void init(UInt8 precision) const { - if (inited || precision == 17) + if (inited) return; + if (precision == 17) { + inited = precision; + return; + } + // TODO: assert "inited == precision" set_17.~CombinedCardinalityEstimator(); From d588120677264a4849b9fe8b88a004eaaa46b4b4 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Thu, 18 Oct 2018 17:47:21 +0300 Subject: [PATCH 005/145] Keep default value for HLL precision in tests - to not break them. --- dbms/scripts/test_uniq_functions.sh | 2 +- .../tests/performance/test_hits/test_hits.xml | 4 +-- .../00212_shard_aggregate_function_uniq.sql | 26 +++++++++---------- .../0_stateless/00264_uniq_many_args.sql | 4 +-- .../0_stateless/00700_decimal_aggregates.sql | 2 +- .../00146_aggregate_function_uniq.sql | 2 +- 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/dbms/scripts/test_uniq_functions.sh b/dbms/scripts/test_uniq_functions.sh index f7e2083610b..2a3becbcfa3 100755 --- a/dbms/scripts/test_uniq_functions.sh +++ b/dbms/scripts/test_uniq_functions.sh @@ -6,6 +6,6 @@ do do n=$(( 10**p * i )) echo -n "$n " - clickhouse-client -q "select uniqHLL12(number), uniq(number), uniqCombined(15)(number) from numbers($n);" + clickhouse-client -q "select uniqHLL12(number), uniq(number), uniqCombined(17)(number) from numbers($n);" done done diff --git a/dbms/tests/performance/test_hits/test_hits.xml b/dbms/tests/performance/test_hits/test_hits.xml index eea308fdd64..e98c6c8e625 100644 --- a/dbms/tests/performance/test_hits/test_hits.xml +++ b/dbms/tests/performance/test_hits/test_hits.xml @@ -86,8 +86,8 @@ PageCharset тоже почти всегда непуст, но его сред SELECT uniq(UserID) FROM test.hits SETTINGS max_threads = 1 SELECT uniq(UserID) FROM test.hits -SELECT uniqCombined(15)(UserID) FROM test.hits SETTINGS max_threads = 1 -SELECT uniqCombined(15)(UserID) FROM test.hits +SELECT uniqCombined(17)(UserID) FROM test.hits SETTINGS max_threads = 1 +SELECT uniqCombined(17)(UserID) FROM test.hits SELECT uniqExact(UserID) FROM test.hits SETTINGS max_threads = 1 SELECT uniqExact(UserID) FROM test.hits diff --git a/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.sql b/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.sql index b3d82b71685..596b7e816b0 100644 --- a/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.sql +++ b/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.sql @@ -20,20 +20,20 @@ SELECT uniqHLL12(dummy) FROM remote('127.0.0.{2,3}', system.one); /* uniqCombined */ -SELECT Y, uniqCombined(15)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; -SELECT Y, uniqCombined(15)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; -SELECT Y, uniqCombined(15)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; -SELECT Y, uniqCombined(15)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; -SELECT Y, uniqCombined(15)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; -SELECT Y, uniqCombined(15)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; -SELECT Y, uniqCombined(15)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; -SELECT Y, uniqCombined(15)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; -SELECT Y, uniqCombined(15)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; -SELECT Y, uniqCombined(15)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; -SELECT Y, uniqCombined(15)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; -SELECT Y, uniqCombined(15)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT Y, uniqCombined(17)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(17)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(17)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; -SELECT uniqCombined(15)(dummy) FROM remote('127.0.0.{2,3}', system.one); +SELECT uniqCombined(17)(dummy) FROM remote('127.0.0.{2,3}', system.one); diff --git a/dbms/tests/queries/0_stateless/00264_uniq_many_args.sql b/dbms/tests/queries/0_stateless/00264_uniq_many_args.sql index a94ec6e8f0a..0a813bab006 100644 --- a/dbms/tests/queries/0_stateless/00264_uniq_many_args.sql +++ b/dbms/tests/queries/0_stateless/00264_uniq_many_args.sql @@ -1,6 +1,6 @@ SELECT uniq(x), uniq((x)), uniq(x, y), uniq((x, y)), uniq(x, y, z), uniq((x, y, z)), - uniqCombined(15)(x), uniqCombined(15)((x)), uniqCombined(15)(x, y), uniqCombined(15)((x, y)), uniqCombined(15)(x, y, z), uniqCombined(15)((x, y, z)), + uniqCombined(17)(x), uniqCombined(17)((x)), uniqCombined(17)(x, y), uniqCombined(17)((x, y)), uniqCombined(17)(x, y, z), uniqCombined(17)((x, y, z)), uniqHLL12(x), uniqHLL12((x)), uniqHLL12(x, y), uniqHLL12((x, y)), uniqHLL12(x, y, z), uniqHLL12((x, y, z)), uniqExact(x), uniqExact((x)), uniqExact(x, y), uniqExact((x, y)), uniqExact(x, y, z), uniqExact((x, y, z)), uniqUpTo(5)(x), uniqUpTo(5)((x)), uniqUpTo(5)(x, y), uniqUpTo(5)((x, y)), uniqUpTo(5)(x, y, z), uniqUpTo(5)((x, y, z)) @@ -16,7 +16,7 @@ FROM SELECT k, uniq(x), uniq((x)), uniq(x, y), uniq((x, y)), uniq(x, y, z), uniq((x, y, z)), - uniqCombined(15)(x), uniqCombined(15)((x)), uniqCombined(15)(x, y), uniqCombined(15)((x, y)), uniqCombined(15)(x, y, z), uniqCombined(15)((x, y, z)), + uniqCombined(17)(x), uniqCombined(17)((x)), uniqCombined(17)(x, y), uniqCombined(17)((x, y)), uniqCombined(17)(x, y, z), uniqCombined(17)((x, y, z)), uniqHLL12(x), uniqHLL12((x)), uniqHLL12(x, y), uniqHLL12((x, y)), uniqHLL12(x, y, z), uniqHLL12((x, y, z)), uniqExact(x), uniqExact((x)), uniqExact(x, y), uniqExact((x, y)), uniqExact(x, y, z), uniqExact((x, y, z)), uniqUpTo(5)(x), uniqUpTo(5)((x)), uniqUpTo(5)(x, y), uniqUpTo(5)((x, y)), uniqUpTo(5)(x, y, z), uniqUpTo(5)((x, y, z)), diff --git a/dbms/tests/queries/0_stateless/00700_decimal_aggregates.sql b/dbms/tests/queries/0_stateless/00700_decimal_aggregates.sql index ba097ada98b..1795398babb 100644 --- a/dbms/tests/queries/0_stateless/00700_decimal_aggregates.sql +++ b/dbms/tests/queries/0_stateless/00700_decimal_aggregates.sql @@ -28,7 +28,7 @@ SELECT avg(a), avg(b), avg(c) FROM test.decimal WHERE a > 0; SELECT avg(a), avg(b), avg(c) FROM test.decimal WHERE a < 0; SELECT (uniq(a), uniq(b), uniq(c)), - (uniqCombined(15)(a), uniqCombined(15)(b), uniqCombined(15)(c)), + (uniqCombined(17)(a), uniqCombined(17)(b), uniqCombined(17)(c)), (uniqExact(a), uniqExact(b), uniqExact(c)), (uniqHLL12(a), uniqHLL12(b), uniqHLL12(c)) FROM (SELECT * FROM test.decimal ORDER BY a); diff --git a/dbms/tests/queries/1_stateful/00146_aggregate_function_uniq.sql b/dbms/tests/queries/1_stateful/00146_aggregate_function_uniq.sql index 0f860948a3a..1200e312652 100644 --- a/dbms/tests/queries/1_stateful/00146_aggregate_function_uniq.sql +++ b/dbms/tests/queries/1_stateful/00146_aggregate_function_uniq.sql @@ -1,3 +1,3 @@ SELECT RegionID, uniqHLL12(WatchID) AS X FROM remote('127.0.0.{1,2}', test, hits) GROUP BY RegionID HAVING X > 100000 ORDER BY RegionID ASC; -SELECT RegionID, uniqCombined(15)(WatchID) AS X FROM remote('127.0.0.{1,2}', test, hits) GROUP BY RegionID HAVING X > 100000 ORDER BY RegionID ASC; +SELECT RegionID, uniqCombined(17)(WatchID) AS X FROM remote('127.0.0.{1,2}', test, hits) GROUP BY RegionID HAVING X > 100000 ORDER BY RegionID ASC; SELECT abs(uniq(WatchID) - uniqExact(WatchID)) FROM test.hits; From 45e6fd8cd25f7328590d69428138ed3095ef8e8f Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Thu, 18 Oct 2018 18:23:42 +0300 Subject: [PATCH 006/145] Fix compilation with gcc and code style --- .../AggregateFunctionUniqCombined.h | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h index 7d8bd487345..d655ab3626e 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h @@ -67,7 +67,7 @@ namespace detail template -struct __attribute__((__packed__)) AggregateFunctionUniqCombinedDataWithKey +struct AggregateFunctionUniqCombinedDataWithKey { template using Set = CombinedCardinalityEstimator set_12; - Set<13> set_13; - Set<14> set_14; - Set<15> set_15; - Set<16> set_16; - Set<17> set_17; - Set<18> set_18; - Set<19> set_19; - Set<20> set_20; + mutable Set<12> set_12; + mutable Set<13> set_13; + mutable Set<14> set_14; + mutable Set<15> set_15; + mutable Set<16> set_16; + mutable Set<17> set_17; + mutable Set<18> set_18; + mutable Set<19> set_19; + mutable Set<20> set_20; }; AggregateFunctionUniqCombinedDataWithKey() : set_17() {} @@ -136,7 +136,8 @@ struct __attribute__((__packed__)) AggregateFunctionUniqCombinedDataWithKey if (inited) return; - if (precision == 17) { + if (precision == 17) + { inited = precision; return; } From 28ea773b9ee17eaa3030e2bac0fd6ad5fd77a870 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Thu, 18 Oct 2018 21:38:45 +0300 Subject: [PATCH 007/145] Use a single "place" for a default precision constant. Also update stateless tests. --- dbms/scripts/test_uniq_functions.sh | 2 +- .../AggregateFunctionUniqCombined.cpp | 2 +- .../AggregateFunctionUniqCombined.h | 72 +++--- ...12_shard_aggregate_function_uniq.reference | 209 ++++++++++++++++++ .../00212_shard_aggregate_function_uniq.sql | 18 ++ .../00264_uniq_many_args.reference | 22 +- .../0_stateless/00264_uniq_many_args.sql | 2 + .../00700_decimal_aggregates.reference | 2 +- .../0_stateless/00700_decimal_aggregates.sql | 1 + .../00146_aggregate_function_uniq.sql | 2 +- 10 files changed, 290 insertions(+), 42 deletions(-) diff --git a/dbms/scripts/test_uniq_functions.sh b/dbms/scripts/test_uniq_functions.sh index 2a3becbcfa3..9a4b6f20433 100755 --- a/dbms/scripts/test_uniq_functions.sh +++ b/dbms/scripts/test_uniq_functions.sh @@ -6,6 +6,6 @@ do do n=$(( 10**p * i )) echo -n "$n " - clickhouse-client -q "select uniqHLL12(number), uniq(number), uniqCombined(17)(number) from numbers($n);" + clickhouse-client -q "select uniqHLL12(number), uniq(number), uniqCombined(number) from numbers($n);" done done diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp index cdaf5b90c70..e148c9157cd 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp @@ -22,7 +22,7 @@ namespace AggregateFunctionPtr createAggregateFunctionUniqCombined( const std::string & name, const DataTypes & argument_types, const Array & params) { - UInt8 precision = 17; /// default value - must correlate with default ctor of |AggregateFunctionUniqCombinedData| + UInt8 precision = detail::UNIQ_COMBINED_DEFAULT_PRECISION; if (!params.empty()) { diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h index d655ab3626e..c4e1a571aee 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h @@ -19,12 +19,16 @@ #include +#define DEFAULT_HLL_PRECISION 17 + namespace DB { namespace detail { + const UInt8 UNIQ_COMBINED_DEFAULT_PRECISION = DEFAULT_HLL_PRECISION; + /** Hash function for uniqCombined. - */ + */ template struct AggregateFunctionUniqCombinedTraits { @@ -94,12 +98,19 @@ struct AggregateFunctionUniqCombinedDataWithKey mutable Set<20> set_20; }; - AggregateFunctionUniqCombinedDataWithKey() : set_17() {} +#define PASTE(x, y) x##y +#define EVAL(x, y) PASTE(x, y) +#define DEFAULT_SET EVAL(set_, DEFAULT_HLL_PRECISION) + + AggregateFunctionUniqCombinedDataWithKey() : DEFAULT_SET() {} ~AggregateFunctionUniqCombinedDataWithKey() { switch (inited) { + case 0: + DEFAULT_SET.~CombinedCardinalityEstimator(); + break; case 12: set_12.~CombinedCardinalityEstimator(); break; @@ -115,7 +126,6 @@ struct AggregateFunctionUniqCombinedDataWithKey case 16: set_16.~CombinedCardinalityEstimator(); break; - case 0: case 17: set_17.~CombinedCardinalityEstimator(); break; @@ -136,7 +146,7 @@ struct AggregateFunctionUniqCombinedDataWithKey if (inited) return; - if (precision == 17) + if (precision == DEFAULT_HLL_PRECISION) { inited = precision; return; @@ -144,7 +154,7 @@ struct AggregateFunctionUniqCombinedDataWithKey // TODO: assert "inited == precision" - set_17.~CombinedCardinalityEstimator(); + DEFAULT_SET.~CombinedCardinalityEstimator(); switch (precision) { @@ -163,6 +173,9 @@ struct AggregateFunctionUniqCombinedDataWithKey case 16: new (&set_16) Set<16>; break; + case 17: + new (&set_17) Set<17>; + break; case 18: new (&set_18) Set<18>; break; @@ -208,28 +221,29 @@ struct AggregateFunctionUniqCombinedDataWithKey break; \ } -#define SET_RETURN_METHOD(method) \ - switch (inited) \ - { \ - case 12: \ - return set_12.method; \ - case 13: \ - return set_13.method; \ - case 14: \ - return set_14.method; \ - case 15: \ - return set_15.method; \ - case 16: \ - return set_16.method; \ - case 18: \ - return set_18.method; \ - case 19: \ - return set_19.method; \ - case 20: \ - return set_20.method; \ - case 17: \ - default: \ - return set_17.method; \ +#define SET_RETURN_METHOD(method) \ + switch (inited) \ + { \ + case 12: \ + return set_12.method; \ + case 13: \ + return set_13.method; \ + case 14: \ + return set_14.method; \ + case 15: \ + return set_15.method; \ + case 16: \ + return set_16.method; \ + case 17: \ + return set_17.method; \ + case 18: \ + return set_18.method; \ + case 19: \ + return set_19.method; \ + case 20: \ + return set_20.method; \ + default: \ + return DEFAULT_SET.method; \ } void insert(Key value, UInt8 precision) @@ -293,6 +307,10 @@ struct AggregateFunctionUniqCombinedDataWithKey #undef SET_METHOD #undef SET_RETURN_METHOD +#undef PASTE +#undef EVAL +#undef DEFAULT_SET +#undef DEFAULT_HLL_PRECISION }; diff --git a/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.reference b/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.reference index 8c94b8a5d58..c7c4f8ebacd 100644 --- a/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.reference +++ b/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.reference @@ -259,6 +259,58 @@ 31 53948 35 53931 36 53982 +1 1 +3 1 +6 1 +7 1 +9 1 +11 1 +14 1 +17 1 +19 1 +20 2 +26 1 +31 1 +35 1 +36 1 +0 162 +1 162 +3 162 +6 162 +7 163 +9 163 +10 81 +11 163 +13 162 +14 162 +17 162 +19 162 +20 162 +21 162 +22 162 +26 162 +31 162 +35 162 +36 162 +0 53901 +1 54056 +3 53999 +6 54129 +7 54213 +9 53853 +10 26975 +11 54064 +13 53963 +14 53997 +17 54129 +19 53923 +20 53958 +21 54117 +22 54150 +26 54047 +31 53948 +35 53931 +36 53982 0.125 1 0.5 1 0.05 1 @@ -311,6 +363,110 @@ 0.043 54150 0.037 54047 0.071 53963 +0.125 1 +0.5 1 +0.05 1 +0.143 1 +0.056 1 +0.048 2 +0.083 1 +0.25 1 +0.1 1 +0.028 1 +0.027 1 +0.031 1 +0.067 1 +0.037 1 +0.045 162 +0.125 163 +0.5 162 +0.05 162 +0.143 162 +0.091 81 +0.056 162 +0.048 162 +0.083 163 +0.25 162 +1 162 +0.1 163 +0.028 162 +0.027 162 +0.031 162 +0.067 162 +0.043 162 +0.037 162 +0.071 162 +0.045 54117 +0.125 54213 +0.5 54056 +0.05 53923 +0.143 54129 +0.091 26975 +0.056 54129 +0.048 53958 +0.083 54064 +0.25 53999 +1 53901 +0.1 53853 +0.028 53931 +0.027 53982 +0.031 53948 +0.067 53997 +0.043 54150 +0.037 54047 +0.071 53963 +0.5 1 +0.05 1 +0.25 1 +0.048 2 +0.083 1 +0.125 1 +0.031 1 +0.143 1 +0.028 1 +0.067 1 +0.027 1 +0.056 1 +0.037 1 +0.1 1 +0.5 162 +0.05 162 +0.25 162 +0.048 162 +0.091 81 +0.043 162 +0.071 162 +0.083 163 +0.125 163 +0.031 162 +0.143 162 +0.028 162 +0.067 162 +0.045 162 +0.027 162 +0.056 162 +0.037 162 +0.1 163 +1 162 +0.5 54056 +0.05 53923 +0.25 53999 +0.048 53958 +0.091 26975 +0.043 54150 +0.071 53963 +0.083 54064 +0.125 54213 +0.031 53948 +0.143 54129 +0.028 53931 +0.067 53997 +0.045 54117 +0.027 53982 +0.056 54129 +0.037 54047 +0.1 53853 +1 53901 0.5 1 0.05 1 0.25 1 @@ -415,4 +571,57 @@ 31 54074 35 54153 36 53999 +1 1 +3 1 +6 1 +7 1 +9 1 +11 1 +14 1 +17 1 +19 1 +20 2 +26 1 +31 1 +35 1 +36 1 +0 162 +1 162 +3 162 +6 162 +7 163 +9 163 +10 81 +11 163 +13 162 +14 162 +17 162 +19 162 +20 162 +21 162 +22 162 +26 162 +31 162 +35 162 +36 162 +0 54195 +1 54086 +3 54127 +6 54173 +7 53969 +9 54257 +10 26985 +11 53880 +13 54105 +14 54043 +17 54176 +19 53913 +20 54088 +21 53991 +22 54112 +26 54136 +31 54074 +35 54153 +36 53999 +1 1 diff --git a/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.sql b/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.sql index 596b7e816b0..a2ab15775c5 100644 --- a/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.sql +++ b/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.sql @@ -20,20 +20,38 @@ SELECT uniqHLL12(dummy) FROM remote('127.0.0.{2,3}', system.one); /* uniqCombined */ +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + SELECT Y, uniqCombined(17)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(17)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(17)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT uniqCombined(dummy) FROM remote('127.0.0.{2,3}', system.one); + SELECT uniqCombined(17)(dummy) FROM remote('127.0.0.{2,3}', system.one); diff --git a/dbms/tests/queries/0_stateless/00264_uniq_many_args.reference b/dbms/tests/queries/0_stateless/00264_uniq_many_args.reference index 758b4d6b8e2..12aeed56e8b 100644 --- a/dbms/tests/queries/0_stateless/00264_uniq_many_args.reference +++ b/dbms/tests/queries/0_stateless/00264_uniq_many_args.reference @@ -1,11 +1,11 @@ -10 10 100 100 1000 1000 10 10 100 100 1000 1000 10 10 101 101 1006 1006 10 10 100 100 1000 1000 6 6 6 6 6 6 -17 10 10 100 100 610 610 10 10 100 100 610 610 10 10 101 101 616 616 10 10 100 100 610 610 6 6 6 6 6 6 766 -52 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 611 611 10 10 100 100 608 608 6 6 6 6 6 6 766 -5 10 10 100 100 609 609 10 10 100 100 609 609 10 10 101 101 608 608 10 10 100 100 609 609 6 6 6 6 6 6 765 -9 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 618 618 10 10 100 100 608 608 6 6 6 6 6 6 765 -13 10 10 100 100 607 607 10 10 100 100 607 607 10 10 101 101 610 610 10 10 100 100 607 607 6 6 6 6 6 6 765 -46 10 10 100 100 607 607 10 10 100 100 607 607 10 10 101 101 611 611 10 10 100 100 607 607 6 6 6 6 6 6 765 -48 10 10 100 100 609 609 10 10 100 100 609 609 10 10 101 101 617 617 10 10 100 100 609 609 6 6 6 6 6 6 765 -50 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 614 614 10 10 100 100 608 608 6 6 6 6 6 6 765 -54 10 10 100 100 609 609 10 10 100 100 609 609 10 10 101 101 615 615 10 10 100 100 609 609 6 6 6 6 6 6 765 -56 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 614 614 10 10 100 100 608 608 6 6 6 6 6 6 765 +10 10 100 100 1000 1000 10 10 100 100 1000 1000 10 10 100 100 1000 1000 10 10 101 101 1006 1006 10 10 100 100 1000 1000 6 6 6 6 6 6 +17 10 10 100 100 610 610 10 10 100 100 610 610 10 10 100 100 610 610 10 10 101 101 616 616 10 10 100 100 610 610 6 6 6 6 6 6 766 +52 10 10 100 100 608 608 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 611 611 10 10 100 100 608 608 6 6 6 6 6 6 766 +5 10 10 100 100 609 609 10 10 100 100 609 609 10 10 100 100 609 609 10 10 101 101 608 608 10 10 100 100 609 609 6 6 6 6 6 6 765 +9 10 10 100 100 608 608 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 618 618 10 10 100 100 608 608 6 6 6 6 6 6 765 +13 10 10 100 100 607 607 10 10 100 100 607 607 10 10 100 100 607 607 10 10 101 101 610 610 10 10 100 100 607 607 6 6 6 6 6 6 765 +46 10 10 100 100 607 607 10 10 100 100 607 607 10 10 100 100 607 607 10 10 101 101 611 611 10 10 100 100 607 607 6 6 6 6 6 6 765 +48 10 10 100 100 609 609 10 10 100 100 609 609 10 10 100 100 609 609 10 10 101 101 617 617 10 10 100 100 609 609 6 6 6 6 6 6 765 +50 10 10 100 100 608 608 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 614 614 10 10 100 100 608 608 6 6 6 6 6 6 765 +54 10 10 100 100 609 609 10 10 100 100 609 609 10 10 100 100 609 609 10 10 101 101 615 615 10 10 100 100 609 609 6 6 6 6 6 6 765 +56 10 10 100 100 608 608 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 614 614 10 10 100 100 608 608 6 6 6 6 6 6 765 diff --git a/dbms/tests/queries/0_stateless/00264_uniq_many_args.sql b/dbms/tests/queries/0_stateless/00264_uniq_many_args.sql index 0a813bab006..847d753a36b 100644 --- a/dbms/tests/queries/0_stateless/00264_uniq_many_args.sql +++ b/dbms/tests/queries/0_stateless/00264_uniq_many_args.sql @@ -1,5 +1,6 @@ SELECT uniq(x), uniq((x)), uniq(x, y), uniq((x, y)), uniq(x, y, z), uniq((x, y, z)), + uniqCombined(x), uniqCombined((x)), uniqCombined(x, y), uniqCombined((x, y)), uniqCombined(x, y, z), uniqCombined((x, y, z)), uniqCombined(17)(x), uniqCombined(17)((x)), uniqCombined(17)(x, y), uniqCombined(17)((x, y)), uniqCombined(17)(x, y, z), uniqCombined(17)((x, y, z)), uniqHLL12(x), uniqHLL12((x)), uniqHLL12(x, y), uniqHLL12((x, y)), uniqHLL12(x, y, z), uniqHLL12((x, y, z)), uniqExact(x), uniqExact((x)), uniqExact(x, y), uniqExact((x, y)), uniqExact(x, y, z), uniqExact((x, y, z)), @@ -16,6 +17,7 @@ FROM SELECT k, uniq(x), uniq((x)), uniq(x, y), uniq((x, y)), uniq(x, y, z), uniq((x, y, z)), + uniqCombined(x), uniqCombined((x)), uniqCombined(x, y), uniqCombined((x, y)), uniqCombined(x, y, z), uniqCombined((x, y, z)), uniqCombined(17)(x), uniqCombined(17)((x)), uniqCombined(17)(x, y), uniqCombined(17)((x, y)), uniqCombined(17)(x, y, z), uniqCombined(17)((x, y, z)), uniqHLL12(x), uniqHLL12((x)), uniqHLL12(x, y), uniqHLL12((x, y)), uniqHLL12(x, y, z), uniqHLL12((x, y, z)), uniqExact(x), uniqExact((x)), uniqExact(x, y), uniqExact((x, y)), uniqExact(x, y, z), uniqExact((x, y, z)), diff --git a/dbms/tests/queries/0_stateless/00700_decimal_aggregates.reference b/dbms/tests/queries/0_stateless/00700_decimal_aggregates.reference index 3b1c6f9099d..1a7c7fbdbb0 100644 --- a/dbms/tests/queries/0_stateless/00700_decimal_aggregates.reference +++ b/dbms/tests/queries/0_stateless/00700_decimal_aggregates.reference @@ -8,7 +8,7 @@ 0.0000 0.00000000 0.00000000 25.5000 8.49999999 5.10000000 -25.5000 -8.49999999 -5.10000000 -(101,101,101) (101,101,101) (101,101,101) (102,100,101) +(101,101,101) (101,101,101) (101,101,101) (101,101,101) (102,100,101) 5 5 5 10 10 10 -50.0000 -50.0000 -16.66666666 -16.66666666 -10.00000000 -10.00000000 diff --git a/dbms/tests/queries/0_stateless/00700_decimal_aggregates.sql b/dbms/tests/queries/0_stateless/00700_decimal_aggregates.sql index 1795398babb..951e1384e93 100644 --- a/dbms/tests/queries/0_stateless/00700_decimal_aggregates.sql +++ b/dbms/tests/queries/0_stateless/00700_decimal_aggregates.sql @@ -28,6 +28,7 @@ SELECT avg(a), avg(b), avg(c) FROM test.decimal WHERE a > 0; SELECT avg(a), avg(b), avg(c) FROM test.decimal WHERE a < 0; SELECT (uniq(a), uniq(b), uniq(c)), + (uniqCombined(a), uniqCombined(b), uniqCombined(c)), (uniqCombined(17)(a), uniqCombined(17)(b), uniqCombined(17)(c)), (uniqExact(a), uniqExact(b), uniqExact(c)), (uniqHLL12(a), uniqHLL12(b), uniqHLL12(c)) diff --git a/dbms/tests/queries/1_stateful/00146_aggregate_function_uniq.sql b/dbms/tests/queries/1_stateful/00146_aggregate_function_uniq.sql index 1200e312652..fd3fde7636d 100644 --- a/dbms/tests/queries/1_stateful/00146_aggregate_function_uniq.sql +++ b/dbms/tests/queries/1_stateful/00146_aggregate_function_uniq.sql @@ -1,3 +1,3 @@ SELECT RegionID, uniqHLL12(WatchID) AS X FROM remote('127.0.0.{1,2}', test, hits) GROUP BY RegionID HAVING X > 100000 ORDER BY RegionID ASC; -SELECT RegionID, uniqCombined(17)(WatchID) AS X FROM remote('127.0.0.{1,2}', test, hits) GROUP BY RegionID HAVING X > 100000 ORDER BY RegionID ASC; +SELECT RegionID, uniqCombined(WatchID) AS X FROM remote('127.0.0.{1,2}', test, hits) GROUP BY RegionID HAVING X > 100000 ORDER BY RegionID ASC; SELECT abs(uniq(WatchID) - uniqExact(WatchID)) FROM test.hits; From 19ed7b6ae9d099d091ab01f5b8ca0a6a92b923d8 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Mon, 22 Oct 2018 13:00:37 +0300 Subject: [PATCH 008/145] Minor optimizations --- .../AggregateFunctionUniqCombined.h | 129 +++++++++--------- 1 file changed, 61 insertions(+), 68 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h index c4e1a571aee..234f24719c5 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h @@ -73,6 +73,8 @@ namespace detail template struct AggregateFunctionUniqCombinedDataWithKey { + using value_type = Key; + template using Set = CombinedCardinalityEstimator>, @@ -87,6 +89,7 @@ struct AggregateFunctionUniqCombinedDataWithKey mutable UInt8 inited = 0; union { + mutable char stub[sizeof(Set<20>)]; mutable Set<12> set_12; mutable Set<13> set_13; mutable Set<14> set_14; @@ -98,19 +101,12 @@ struct AggregateFunctionUniqCombinedDataWithKey mutable Set<20> set_20; }; -#define PASTE(x, y) x##y -#define EVAL(x, y) PASTE(x, y) -#define DEFAULT_SET EVAL(set_, DEFAULT_HLL_PRECISION) - - AggregateFunctionUniqCombinedDataWithKey() : DEFAULT_SET() {} + AggregateFunctionUniqCombinedDataWithKey() : stub() {} ~AggregateFunctionUniqCombinedDataWithKey() { switch (inited) { - case 0: - DEFAULT_SET.~CombinedCardinalityEstimator(); - break; case 12: set_12.~CombinedCardinalityEstimator(); break; @@ -141,21 +137,13 @@ struct AggregateFunctionUniqCombinedDataWithKey } } - void init(UInt8 precision) const + void ALWAYS_INLINE init(UInt8 precision) const { if (inited) return; - if (precision == DEFAULT_HLL_PRECISION) - { - inited = precision; - return; - } - // TODO: assert "inited == precision" - DEFAULT_SET.~CombinedCardinalityEstimator(); - switch (precision) { case 12: @@ -221,40 +209,39 @@ struct AggregateFunctionUniqCombinedDataWithKey break; \ } -#define SET_RETURN_METHOD(method) \ - switch (inited) \ - { \ - case 12: \ - return set_12.method; \ - case 13: \ - return set_13.method; \ - case 14: \ - return set_14.method; \ - case 15: \ - return set_15.method; \ - case 16: \ - return set_16.method; \ - case 17: \ - return set_17.method; \ - case 18: \ - return set_18.method; \ - case 19: \ - return set_19.method; \ - case 20: \ - return set_20.method; \ - default: \ - return DEFAULT_SET.method; \ +#define SET_RETURN_METHOD(method) \ + switch (inited) \ + { \ + case 12: \ + return set_12.method; \ + case 13: \ + return set_13.method; \ + case 14: \ + return set_14.method; \ + case 15: \ + return set_15.method; \ + case 16: \ + return set_16.method; \ + case 17: \ + return set_17.method; \ + case 18: \ + return set_18.method; \ + case 19: \ + return set_19.method; \ + case 20: \ + return set_20.method; \ + default: \ + /* TODO: UNREACHABLE! */ \ + return set_12.method; \ } - void insert(Key value, UInt8 precision) + void ALWAYS_INLINE insert(Key value) { - init(precision); SET_METHOD(insert(value)); } - void merge(const AggregateFunctionUniqCombinedDataWithKey & rhs, UInt8 precision) + void ALWAYS_INLINE merge(const AggregateFunctionUniqCombinedDataWithKey & rhs) { - init(precision); switch (inited) { case 12: @@ -287,41 +274,35 @@ struct AggregateFunctionUniqCombinedDataWithKey } } - void write(DB::WriteBuffer & out, UInt8 precision) const + void ALWAYS_INLINE write(DB::WriteBuffer & out) const { - init(precision); SET_METHOD(write(out)); } - void read(DB::ReadBuffer & in, UInt8 precision) + void ALWAYS_INLINE read(DB::ReadBuffer & in) { - init(precision); SET_METHOD(read(in)); } - UInt32 size(UInt8 precision) const + UInt32 ALWAYS_INLINE size() const { - init(precision); SET_RETURN_METHOD(size()); } #undef SET_METHOD #undef SET_RETURN_METHOD -#undef PASTE -#undef EVAL -#undef DEFAULT_SET #undef DEFAULT_HLL_PRECISION }; template -struct __attribute__((__packed__)) AggregateFunctionUniqCombinedData : public AggregateFunctionUniqCombinedDataWithKey +struct AggregateFunctionUniqCombinedData : public AggregateFunctionUniqCombinedDataWithKey { }; template <> -struct __attribute__((__packed__)) AggregateFunctionUniqCombinedData : public AggregateFunctionUniqCombinedDataWithKey +struct AggregateFunctionUniqCombinedData : public AggregateFunctionUniqCombinedDataWithKey { }; @@ -346,38 +327,44 @@ public: return std::make_shared(); } + void create(AggregateDataPtr place) const override + { + IAggregateFunctionDataHelper, AggregateFunctionUniqCombined>::create(place); + this->data(place).init(precision); + } + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override { if constexpr (!std::is_same_v) { const auto & value = static_cast &>(*columns[0]).getData()[row_num]; - this->data(place).insert(detail::AggregateFunctionUniqCombinedTraits::hash(value), precision); + this->data(place).insert(detail::AggregateFunctionUniqCombinedTraits::hash(value)); } else { StringRef value = columns[0]->getDataAt(row_num); - this->data(place).insert(CityHash_v1_0_2::CityHash64(value.data, value.size), precision); + this->data(place).insert(CityHash_v1_0_2::CityHash64(value.data, value.size)); } } void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override { - this->data(place).merge(this->data(rhs), precision); + this->data(place).merge(this->data(rhs)); } void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { - this->data(place).write(buf, precision); + this->data(place).write(buf); } void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { - this->data(place).read(buf, precision); + this->data(place).read(buf); } void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override { - static_cast(to).getData().push_back(this->data(place).size(precision)); + static_cast(to).getData().push_back(this->data(place).size()); } const char * getHeaderFilePath() const override @@ -417,31 +404,37 @@ public: return std::make_shared(); } + void create(AggregateDataPtr place) const override + { + IAggregateFunctionDataHelper, + AggregateFunctionUniqCombinedVariadic>::create(place); + this->data(place).init(precision); + } + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override { - this->data(place).insert(typename AggregateFunctionUniqCombinedData::Set<12>::value_type( - UniqVariadicHash::apply(num_args, columns, row_num)), - precision); + this->data(place).insert(typename AggregateFunctionUniqCombinedData::value_type( + UniqVariadicHash::apply(num_args, columns, row_num))); } void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override { - this->data(place).merge(this->data(rhs), precision); + this->data(place).merge(this->data(rhs)); } void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { - this->data(place).write(buf, precision); + this->data(place).write(buf); } void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { - this->data(place).read(buf, precision); + this->data(place).read(buf); } void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override { - static_cast(to).getData().push_back(this->data(place).size(precision)); + static_cast(to).getData().push_back(this->data(place).size()); } const char * getHeaderFilePath() const override From 2a53716043f4f32b972ea18e5a46a9e8c48bae53 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Mon, 22 Oct 2018 20:18:08 +0300 Subject: [PATCH 009/145] Move the precision template argument one level up. --- .../AggregateFunctionUniqCombined.cpp | 143 +++++---- .../AggregateFunctionUniqCombined.h | 277 ++---------------- 2 files changed, 113 insertions(+), 307 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp index e148c9157cd..1ad628588d6 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp @@ -8,7 +8,6 @@ namespace DB { - namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; @@ -18,67 +17,105 @@ namespace ErrorCodes namespace { - -AggregateFunctionPtr createAggregateFunctionUniqCombined( - const std::string & name, const DataTypes & argument_types, const Array & params) -{ - UInt8 precision = detail::UNIQ_COMBINED_DEFAULT_PRECISION; - - if (!params.empty()) + template + struct WithK { - if (params.size() != 1) - throw Exception( - "Aggregate function " + name + " requires one parameter or less.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + template + using AggregateFunction = AggregateFunctionUniqCombined; - UInt64 precision_param = applyVisitor(FieldVisitorConvertToNumber(), params[0]); + template + using AggregateFunctionVariadic = AggregateFunctionUniqCombinedVariadic; + }; - // This range is hardcoded into |AggregateFunctionUniqCombinedData| - if (precision_param > 20 || precision_param < 12) - throw Exception( - "Parameter for aggregate function " + name + "is out or range: [12, 20].", ErrorCodes::ARGUMENT_OUT_OF_BOUND); - - precision = precision_param; - } - - if (argument_types.empty()) - throw Exception("Incorrect number of arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - /// We use exact hash function if the user wants it; - /// or if the arguments are not contiguous in memory, because only exact hash function have support for this case. - bool use_exact_hash_function = !isAllArgumentsContiguousInMemory(argument_types); - - if (argument_types.size() == 1) + template + AggregateFunctionPtr createAggregateFunctionWithK(const DataTypes & argument_types) { - const IDataType & argument_type = *argument_types[0]; + /// We use exact hash function if the user wants it; + /// or if the arguments are not contiguous in memory, because only exact hash function have support for this case. + bool use_exact_hash_function = !isAllArgumentsContiguousInMemory(argument_types); - AggregateFunctionPtr res(createWithNumericType(*argument_types[0], precision)); - - WhichDataType which(argument_type); - if (res) - return res; - else if (which.isDate()) - return std::make_shared>(precision); - else if (which.isDateTime()) - return std::make_shared>(precision); - else if (which.isStringOrFixedString()) - return std::make_shared>(precision); - else if (which.isUUID()) - return std::make_shared>(precision); - else if (which.isTuple()) + if (argument_types.size() == 1) { - if (use_exact_hash_function) - return std::make_shared>(argument_types, precision); - else - return std::make_shared>(argument_types, precision); + const IDataType & argument_type = *argument_types[0]; + + AggregateFunctionPtr res(createWithNumericType::template AggregateFunction>(*argument_types[0])); + + WhichDataType which(argument_type); + if (res) + return res; + else if (which.isDate()) + return std::make_shared::template AggregateFunction>(); + else if (which.isDateTime()) + return std::make_shared::template AggregateFunction>(); + else if (which.isStringOrFixedString()) + return std::make_shared::template AggregateFunction>(); + else if (which.isUUID()) + return std::make_shared::template AggregateFunction>(); + else if (which.isTuple()) + { + if (use_exact_hash_function) + return std::make_shared::template AggregateFunctionVariadic>(argument_types); + else + return std::make_shared::template AggregateFunctionVariadic>(argument_types); + } } + + /// "Variadic" method also works as a fallback generic case for a single argument. + if (use_exact_hash_function) + return std::make_shared::template AggregateFunctionVariadic>(argument_types); + else + return std::make_shared::template AggregateFunctionVariadic>(argument_types); } - /// "Variadic" method also works as a fallback generic case for single argument. - if (use_exact_hash_function) - return std::make_shared>(argument_types, precision); - else - return std::make_shared>(argument_types, precision); -} + AggregateFunctionPtr createAggregateFunctionUniqCombined( + const std::string & name, const DataTypes & argument_types, const Array & params) + { + UInt8 precision = 17; + + if (!params.empty()) + { + if (params.size() != 1) + throw Exception( + "Aggregate function " + name + " requires one parameter or less.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + UInt64 precision_param = applyVisitor(FieldVisitorConvertToNumber(), params[0]); + + // This range is hardcoded below + if (precision_param > 20 || precision_param < 12) + throw Exception( + "Parameter for aggregate function " + name + "is out or range: [12, 20].", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + + precision = precision_param; + } + + if (argument_types.empty()) + throw Exception("Incorrect number of arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + switch (precision) + { + case 12: + return createAggregateFunctionWithK<12>(argument_types); + case 13: + return createAggregateFunctionWithK<13>(argument_types); + case 14: + return createAggregateFunctionWithK<14>(argument_types); + case 15: + return createAggregateFunctionWithK<15>(argument_types); + case 16: + return createAggregateFunctionWithK<16>(argument_types); + case 17: + return createAggregateFunctionWithK<17>(argument_types); + case 18: + return createAggregateFunctionWithK<18>(argument_types); + case 19: + return createAggregateFunctionWithK<19>(argument_types); + case 20: + return createAggregateFunctionWithK<20>(argument_types); + } + + // TODO: not reached! + return {}; + } } // namespace diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h index 234f24719c5..1e855cb8f5f 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h @@ -19,14 +19,10 @@ #include -#define DEFAULT_HLL_PRECISION 17 - namespace DB { namespace detail { - const UInt8 UNIQ_COMBINED_DEFAULT_PRECISION = DEFAULT_HLL_PRECISION; - /** Hash function for uniqCombined. */ template @@ -70,12 +66,9 @@ namespace detail } // namespace detail -template +template struct AggregateFunctionUniqCombinedDataWithKey { - using value_type = Key; - - template using Set = CombinedCardinalityEstimator>, 16, @@ -86,237 +79,27 @@ struct AggregateFunctionUniqCombinedDataWithKey HyperLogLogBiasEstimator, HyperLogLogMode::FullFeatured>; - mutable UInt8 inited = 0; - union - { - mutable char stub[sizeof(Set<20>)]; - mutable Set<12> set_12; - mutable Set<13> set_13; - mutable Set<14> set_14; - mutable Set<15> set_15; - mutable Set<16> set_16; - mutable Set<17> set_17; - mutable Set<18> set_18; - mutable Set<19> set_19; - mutable Set<20> set_20; - }; - - AggregateFunctionUniqCombinedDataWithKey() : stub() {} - - ~AggregateFunctionUniqCombinedDataWithKey() - { - switch (inited) - { - case 12: - set_12.~CombinedCardinalityEstimator(); - break; - case 13: - set_13.~CombinedCardinalityEstimator(); - break; - case 14: - set_14.~CombinedCardinalityEstimator(); - break; - case 15: - set_15.~CombinedCardinalityEstimator(); - break; - case 16: - set_16.~CombinedCardinalityEstimator(); - break; - case 17: - set_17.~CombinedCardinalityEstimator(); - break; - case 18: - set_18.~CombinedCardinalityEstimator(); - break; - case 19: - set_19.~CombinedCardinalityEstimator(); - break; - case 20: - set_20.~CombinedCardinalityEstimator(); - break; - } - } - - void ALWAYS_INLINE init(UInt8 precision) const - { - if (inited) - return; - - // TODO: assert "inited == precision" - - switch (precision) - { - case 12: - new (&set_12) Set<12>; - break; - case 13: - new (&set_13) Set<13>; - break; - case 14: - new (&set_14) Set<14>; - break; - case 15: - new (&set_15) Set<15>; - break; - case 16: - new (&set_16) Set<16>; - break; - case 17: - new (&set_17) Set<17>; - break; - case 18: - new (&set_18) Set<18>; - break; - case 19: - new (&set_19) Set<19>; - break; - case 20: - new (&set_20) Set<20>; - break; - } - inited = precision; - } - -#define SET_METHOD(method) \ - switch (inited) \ - { \ - case 12: \ - set_12.method; \ - break; \ - case 13: \ - set_13.method; \ - break; \ - case 14: \ - set_14.method; \ - break; \ - case 15: \ - set_15.method; \ - break; \ - case 16: \ - set_16.method; \ - break; \ - case 17: \ - set_17.method; \ - break; \ - case 18: \ - set_18.method; \ - break; \ - case 19: \ - set_19.method; \ - break; \ - case 20: \ - set_20.method; \ - break; \ - } - -#define SET_RETURN_METHOD(method) \ - switch (inited) \ - { \ - case 12: \ - return set_12.method; \ - case 13: \ - return set_13.method; \ - case 14: \ - return set_14.method; \ - case 15: \ - return set_15.method; \ - case 16: \ - return set_16.method; \ - case 17: \ - return set_17.method; \ - case 18: \ - return set_18.method; \ - case 19: \ - return set_19.method; \ - case 20: \ - return set_20.method; \ - default: \ - /* TODO: UNREACHABLE! */ \ - return set_12.method; \ - } - - void ALWAYS_INLINE insert(Key value) - { - SET_METHOD(insert(value)); - } - - void ALWAYS_INLINE merge(const AggregateFunctionUniqCombinedDataWithKey & rhs) - { - switch (inited) - { - case 12: - set_12.merge(rhs.set_12); - break; - case 13: - set_13.merge(rhs.set_13); - break; - case 14: - set_14.merge(rhs.set_14); - break; - case 15: - set_15.merge(rhs.set_15); - break; - case 16: - set_16.merge(rhs.set_16); - break; - case 17: - set_17.merge(rhs.set_17); - break; - case 18: - set_18.merge(rhs.set_18); - break; - case 19: - set_19.merge(rhs.set_19); - break; - case 20: - set_20.merge(rhs.set_20); - break; - } - } - - void ALWAYS_INLINE write(DB::WriteBuffer & out) const - { - SET_METHOD(write(out)); - } - - void ALWAYS_INLINE read(DB::ReadBuffer & in) - { - SET_METHOD(read(in)); - } - - UInt32 ALWAYS_INLINE size() const - { - SET_RETURN_METHOD(size()); - } - -#undef SET_METHOD -#undef SET_RETURN_METHOD -#undef DEFAULT_HLL_PRECISION + Set set; }; -template -struct AggregateFunctionUniqCombinedData : public AggregateFunctionUniqCombinedDataWithKey +template +struct AggregateFunctionUniqCombinedData : public AggregateFunctionUniqCombinedDataWithKey { }; -template <> -struct AggregateFunctionUniqCombinedData : public AggregateFunctionUniqCombinedDataWithKey +template +struct AggregateFunctionUniqCombinedData : public AggregateFunctionUniqCombinedDataWithKey { }; -template +template class AggregateFunctionUniqCombined final - : public IAggregateFunctionDataHelper, AggregateFunctionUniqCombined> + : public IAggregateFunctionDataHelper, AggregateFunctionUniqCombined> { -private: - const UInt8 precision; - public: - explicit AggregateFunctionUniqCombined(UInt8 precision) : precision(precision) {} - String getName() const override { return "uniqCombined"; @@ -327,44 +110,38 @@ public: return std::make_shared(); } - void create(AggregateDataPtr place) const override - { - IAggregateFunctionDataHelper, AggregateFunctionUniqCombined>::create(place); - this->data(place).init(precision); - } - void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override { if constexpr (!std::is_same_v) { const auto & value = static_cast &>(*columns[0]).getData()[row_num]; - this->data(place).insert(detail::AggregateFunctionUniqCombinedTraits::hash(value)); + this->data(place).set.insert(detail::AggregateFunctionUniqCombinedTraits::hash(value)); } else { StringRef value = columns[0]->getDataAt(row_num); - this->data(place).insert(CityHash_v1_0_2::CityHash64(value.data, value.size)); + this->data(place).set.insert(CityHash_v1_0_2::CityHash64(value.data, value.size)); } } void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override { - this->data(place).merge(this->data(rhs)); + this->data(place).set.merge(this->data(rhs).set); } void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { - this->data(place).write(buf); + this->data(place).set.write(buf); } void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { - this->data(place).read(buf); + this->data(place).set.read(buf); } void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override { - static_cast(to).getData().push_back(this->data(place).size()); + static_cast(to).getData().push_back(this->data(place).set.size()); } const char * getHeaderFilePath() const override @@ -377,16 +154,15 @@ public: * You can pass multiple arguments as is; You can also pass one argument - a tuple. * But (for the possibility of efficient implementation), you can not pass several arguments, among which there are tuples. */ -template -class AggregateFunctionUniqCombinedVariadic final : public IAggregateFunctionDataHelper, - AggregateFunctionUniqCombinedVariadic> +template +class AggregateFunctionUniqCombinedVariadic final : public IAggregateFunctionDataHelper, + AggregateFunctionUniqCombinedVariadic> { private: size_t num_args = 0; - UInt8 precision; public: - AggregateFunctionUniqCombinedVariadic(const DataTypes & arguments, UInt8 precision) : precision(precision) + explicit AggregateFunctionUniqCombinedVariadic(const DataTypes & arguments) { if (argument_is_tuple) num_args = typeid_cast(*arguments[0]).getElements().size(); @@ -404,37 +180,30 @@ public: return std::make_shared(); } - void create(AggregateDataPtr place) const override - { - IAggregateFunctionDataHelper, - AggregateFunctionUniqCombinedVariadic>::create(place); - this->data(place).init(precision); - } - void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override { - this->data(place).insert(typename AggregateFunctionUniqCombinedData::value_type( + this->data(place).set.insert(typename AggregateFunctionUniqCombinedData::Set::value_type( UniqVariadicHash::apply(num_args, columns, row_num))); } void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override { - this->data(place).merge(this->data(rhs)); + this->data(place).set.merge(this->data(rhs).set); } void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { - this->data(place).write(buf); + this->data(place).set.write(buf); } void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { - this->data(place).read(buf); + this->data(place).set.read(buf); } void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override { - static_cast(to).getData().push_back(this->data(place).size()); + static_cast(to).getData().push_back(this->data(place).set.size()); } const char * getHeaderFilePath() const override From c8868cffaa6214584af13d5b335afc413dd603f9 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Mon, 22 Oct 2018 22:34:39 +0300 Subject: [PATCH 010/145] Update AggregateFunctionUniqCombined.cpp --- dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp index 1ad628588d6..3c747d94e63 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp @@ -30,8 +30,7 @@ namespace template AggregateFunctionPtr createAggregateFunctionWithK(const DataTypes & argument_types) { - /// We use exact hash function if the user wants it; - /// or if the arguments are not contiguous in memory, because only exact hash function have support for this case. + /// We use exact hash function if the arguments are not contiguous in memory, because only exact hash function has support for this case. bool use_exact_hash_function = !isAllArgumentsContiguousInMemory(argument_types); if (argument_types.size() == 1) From f90783d885a040aef48aed0725d86b2bba9a0092 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Mon, 22 Oct 2018 22:36:32 +0300 Subject: [PATCH 011/145] Update AggregateFunctionUniqCombined.cpp --- dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp index 3c747d94e63..0430c8872f1 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp @@ -69,6 +69,7 @@ namespace AggregateFunctionPtr createAggregateFunctionUniqCombined( const std::string & name, const DataTypes & argument_types, const Array & params) { + /// Reasonable default value, selected to be comparable in quality with "uniq" aggregate function. UInt8 precision = 17; if (!params.empty()) From bd148bb082c99a622b9ccdc85ec676c23ffea139 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Mon, 22 Oct 2018 22:37:55 +0300 Subject: [PATCH 012/145] Update AggregateFunctionUniqCombined.cpp --- dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp index 0430c8872f1..0ad4a093ed5 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp @@ -69,6 +69,7 @@ namespace AggregateFunctionPtr createAggregateFunctionUniqCombined( const std::string & name, const DataTypes & argument_types, const Array & params) { + /// log2 of the number of cells in HyperLogLog. /// Reasonable default value, selected to be comparable in quality with "uniq" aggregate function. UInt8 precision = 17; From fd86a6bea5b51aca607889819b2dff9c0e120503 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Mon, 22 Oct 2018 22:38:46 +0300 Subject: [PATCH 013/145] Update AggregateFunctionUniqCombined.cpp --- dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp index 0ad4a093ed5..90b84d3b927 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp @@ -114,8 +114,7 @@ namespace return createAggregateFunctionWithK<20>(argument_types); } - // TODO: not reached! - return {}; + __builtin_unreachable(); } } // namespace From 9296ef50089b8427734f8a9b0e59b6ba98b2b0d0 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Tue, 23 Oct 2018 17:59:24 +0300 Subject: [PATCH 014/145] Temporarily disable precalculated bias estimations for precisions other than 17. --- dbms/scripts/gen-bias-data.py | 2 +- .../AggregateFunctionUniqCombined.h | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/dbms/scripts/gen-bias-data.py b/dbms/scripts/gen-bias-data.py index 7edc9948e76..034cfcca7dd 100755 --- a/dbms/scripts/gen-bias-data.py +++ b/dbms/scripts/gen-bias-data.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3.4 +#!/usr/bin/python3 # -*- coding: utf-8 -*- import sys diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h index 1e855cb8f5f..fac220b7bbb 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h @@ -68,12 +68,20 @@ namespace detail template struct AggregateFunctionUniqCombinedDataWithKey +{ + using Set = CombinedCardinalityEstimator>, 16, K - 3, K, TrivialHash, Key>; + + Set set; +}; + +template +struct AggregateFunctionUniqCombinedDataWithKey { using Set = CombinedCardinalityEstimator>, 16, - K - 3, - K, + 14, + 17, TrivialHash, Key, HyperLogLogBiasEstimator, From aa6a69b6350e668014b6183e170666543173694f Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Wed, 24 Oct 2018 17:28:23 +0300 Subject: [PATCH 015/145] Add TODO comment --- dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h index fac220b7bbb..f15c4e7e453 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h @@ -69,6 +69,8 @@ namespace detail template struct AggregateFunctionUniqCombinedDataWithKey { + // TODO(ilezhankin): pre-generate values for |UniqCombinedBiasData|, + // at the moment gen-bias-data.py script doesn't work. using Set = CombinedCardinalityEstimator>, 16, K - 3, K, TrivialHash, Key>; Set set; From 68138a76f8191db8fb21601989b56702a1492e4c Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Wed, 24 Oct 2018 21:05:55 +0300 Subject: [PATCH 016/145] Add tests for different HLL precisions --- ...12_shard_aggregate_function_uniq.reference | 418 ++++++++++++++++++ .../00212_shard_aggregate_function_uniq.sql | 36 ++ 2 files changed, 454 insertions(+) diff --git a/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.reference b/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.reference index c7c4f8ebacd..7ca0f2fb7be 100644 --- a/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.reference +++ b/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.reference @@ -292,6 +292,58 @@ 31 162 35 162 36 162 +0 54708 +1 53721 +3 53226 +6 54532 +7 52275 +9 53417 +10 26931 +11 54428 +13 53409 +14 53188 +17 55120 +19 54123 +20 53293 +21 53928 +22 53827 +26 53920 +31 53763 +35 54635 +36 53155 +1 1 +3 1 +6 1 +7 1 +9 1 +11 1 +14 1 +17 1 +19 1 +20 2 +26 1 +31 1 +35 1 +36 1 +0 162 +1 162 +3 162 +6 162 +7 163 +9 163 +10 81 +11 163 +13 162 +14 162 +17 162 +19 162 +20 162 +21 162 +22 162 +26 162 +31 162 +35 162 +36 162 0 53901 1 54056 3 53999 @@ -311,6 +363,58 @@ 31 53948 35 53931 36 53982 +1 1 +3 1 +6 1 +7 1 +9 1 +11 1 +14 1 +17 1 +19 1 +20 2 +26 1 +31 1 +35 1 +36 1 +0 162 +1 162 +3 162 +6 162 +7 163 +9 163 +10 81 +11 163 +13 162 +14 162 +17 162 +19 162 +20 162 +21 162 +22 162 +26 162 +31 162 +35 162 +36 162 +0 54054 +1 54054 +3 54054 +6 54054 +7 54053 +9 54053 +10 27027 +11 54055 +13 54054 +14 54054 +17 54054 +19 54053 +20 54053 +21 54054 +22 54053 +26 54053 +31 54054 +35 54054 +36 54054 0.125 1 0.5 1 0.05 1 @@ -396,6 +500,58 @@ 0.043 162 0.037 162 0.071 162 +0.045 53928 +0.125 52275 +0.5 53721 +0.05 54123 +0.143 54532 +0.091 26931 +0.056 55120 +0.048 53293 +0.083 54428 +0.25 53226 +1 54708 +0.1 53417 +0.028 54635 +0.027 53155 +0.031 53763 +0.067 53188 +0.043 53827 +0.037 53920 +0.071 53409 +0.125 1 +0.5 1 +0.05 1 +0.143 1 +0.056 1 +0.048 2 +0.083 1 +0.25 1 +0.1 1 +0.028 1 +0.027 1 +0.031 1 +0.067 1 +0.037 1 +0.045 162 +0.125 163 +0.5 162 +0.05 162 +0.143 162 +0.091 81 +0.056 162 +0.048 162 +0.083 163 +0.25 162 +1 162 +0.1 163 +0.028 162 +0.027 162 +0.031 162 +0.067 162 +0.043 162 +0.037 162 +0.071 162 0.045 54117 0.125 54213 0.5 54056 @@ -415,6 +571,58 @@ 0.043 54150 0.037 54047 0.071 53963 +0.125 1 +0.5 1 +0.05 1 +0.143 1 +0.056 1 +0.048 2 +0.083 1 +0.25 1 +0.1 1 +0.028 1 +0.027 1 +0.031 1 +0.067 1 +0.037 1 +0.045 162 +0.125 163 +0.5 162 +0.05 162 +0.143 162 +0.091 81 +0.056 162 +0.048 162 +0.083 163 +0.25 162 +1 162 +0.1 163 +0.028 162 +0.027 162 +0.031 162 +0.067 162 +0.043 162 +0.037 162 +0.071 162 +0.045 54054 +0.125 54053 +0.5 54054 +0.05 54053 +0.143 54054 +0.091 27027 +0.056 54054 +0.048 54053 +0.083 54055 +0.25 54054 +1 54054 +0.1 54053 +0.028 54054 +0.027 54054 +0.031 54054 +0.067 54054 +0.043 54053 +0.037 54053 +0.071 54054 0.5 1 0.05 1 0.25 1 @@ -500,6 +708,58 @@ 0.037 162 0.1 163 1 162 +0.5 53721 +0.05 54123 +0.25 53226 +0.048 53293 +0.091 26931 +0.043 53827 +0.071 53409 +0.083 54428 +0.125 52275 +0.031 53763 +0.143 54532 +0.028 54635 +0.067 53188 +0.045 53928 +0.027 53155 +0.056 55120 +0.037 53920 +0.1 53417 +1 54708 +0.5 1 +0.05 1 +0.25 1 +0.048 2 +0.083 1 +0.125 1 +0.031 1 +0.143 1 +0.028 1 +0.067 1 +0.027 1 +0.056 1 +0.037 1 +0.1 1 +0.5 162 +0.05 162 +0.25 162 +0.048 162 +0.091 81 +0.043 162 +0.071 162 +0.083 163 +0.125 163 +0.031 162 +0.143 162 +0.028 162 +0.067 162 +0.045 162 +0.027 162 +0.056 162 +0.037 162 +0.1 163 +1 162 0.5 54056 0.05 53923 0.25 53999 @@ -519,6 +779,58 @@ 0.037 54047 0.1 53853 1 53901 +0.5 1 +0.05 1 +0.25 1 +0.048 2 +0.083 1 +0.125 1 +0.031 1 +0.143 1 +0.028 1 +0.067 1 +0.027 1 +0.056 1 +0.037 1 +0.1 1 +0.5 162 +0.05 162 +0.25 162 +0.048 162 +0.091 81 +0.043 162 +0.071 162 +0.083 163 +0.125 163 +0.031 162 +0.143 162 +0.028 162 +0.067 162 +0.045 162 +0.027 162 +0.056 162 +0.037 162 +0.1 163 +1 162 +0.5 54054 +0.05 54053 +0.25 54054 +0.048 54053 +0.091 27027 +0.043 54053 +0.071 54054 +0.083 54055 +0.125 54053 +0.031 54054 +0.143 54054 +0.028 54054 +0.067 54054 +0.045 54054 +0.027 54054 +0.056 54054 +0.037 54053 +0.1 54053 +1 54054 1 1 3 1 6 1 @@ -604,6 +916,58 @@ 31 162 35 162 36 162 +0 52613 +1 54468 +3 53824 +6 54441 +7 54543 +9 51908 +10 26964 +11 54013 +13 53178 +14 54113 +17 54662 +19 54697 +20 53279 +21 55301 +22 53693 +26 53873 +31 55200 +35 54808 +36 53051 +1 1 +3 1 +6 1 +7 1 +9 1 +11 1 +14 1 +17 1 +19 1 +20 2 +26 1 +31 1 +35 1 +36 1 +0 162 +1 162 +3 162 +6 162 +7 163 +9 163 +10 81 +11 163 +13 162 +14 162 +17 162 +19 162 +20 162 +21 162 +22 162 +26 162 +31 162 +35 162 +36 162 0 54195 1 54086 3 54127 @@ -623,5 +987,59 @@ 31 54074 35 54153 36 53999 +1 1 +3 1 +6 1 +7 1 +9 1 +11 1 +14 1 +17 1 +19 1 +20 2 +26 1 +31 1 +35 1 +36 1 +0 162 +1 162 +3 162 +6 162 +7 163 +9 163 +10 81 +11 163 +13 162 +14 162 +17 162 +19 162 +20 162 +21 162 +22 162 +26 162 +31 162 +35 162 +36 162 +0 54054 +1 54054 +3 54054 +6 54054 +7 54054 +9 54054 +10 27027 +11 54055 +13 54054 +14 54054 +17 54054 +19 54054 +20 54054 +21 54054 +22 54054 +26 54054 +31 54054 +35 54054 +36 54054 +1 +1 1 1 diff --git a/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.sql b/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.sql index a2ab15775c5..ae54831b1af 100644 --- a/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.sql +++ b/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.sql @@ -24,34 +24,70 @@ SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT Y, uniqCombined(12)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(12)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(12)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + SELECT Y, uniqCombined(17)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(17)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(17)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT Y, uniqCombined(20)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(20)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(20)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + SELECT uniqCombined(dummy) FROM remote('127.0.0.{2,3}', system.one); +SELECT uniqCombined(12)(dummy) FROM remote('127.0.0.{2,3}', system.one); + SELECT uniqCombined(17)(dummy) FROM remote('127.0.0.{2,3}', system.one); + +SELECT uniqCombined(20)(dummy) FROM remote('127.0.0.{2,3}', system.one); From 0592081f2fe4b550c28d06dc8b017b34b64bdbbf Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Thu, 25 Oct 2018 16:17:29 +0300 Subject: [PATCH 017/145] Reduce maximum allowable size of |HashSet|. --- .../AggregateFunctions/AggregateFunctionUniqCombined.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h index f15c4e7e453..99cc3a93890 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h @@ -71,7 +71,11 @@ struct AggregateFunctionUniqCombinedDataWithKey { // TODO(ilezhankin): pre-generate values for |UniqCombinedBiasData|, // at the moment gen-bias-data.py script doesn't work. - using Set = CombinedCardinalityEstimator>, 16, K - 3, K, TrivialHash, Key>; + + // We want to migrate from |HashSet| to |HyperLogLogCounter| when the sizes in memory become almost equal. + // The size per element in |HashSet| is sizeof(Key)*2 bytes, and the overall size of |HyperLogLogCounter| is 2^K * 6 bits. + // For Key=UInt32 we can calculate: 2^X * 4 * 2 ≤ 2^(K-3) * 6 ⇒ X ≤ K-4. + using Set = CombinedCardinalityEstimator>, 16, K - 4, K, TrivialHash, Key>; Set set; }; @@ -82,7 +86,7 @@ struct AggregateFunctionUniqCombinedDataWithKey using Set = CombinedCardinalityEstimator>, 16, - 14, + 13, 17, TrivialHash, Key, From 73e9f5057dc1eb6c10455d6a85af5c6d15e13206 Mon Sep 17 00:00:00 2001 From: Sabyanin Maxim Date: Sat, 27 Oct 2018 23:39:59 +0300 Subject: [PATCH 018/145] fix code style checker errors --- dbms/src/Parsers/ASTColumnDeclaration.h | 6 ++++-- dbms/src/Parsers/ParserCreateQuery.h | 3 ++- dbms/src/Storages/ColumnComment.h | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/dbms/src/Parsers/ASTColumnDeclaration.h b/dbms/src/Parsers/ASTColumnDeclaration.h index ea078ad0052..0b4aeddb796 100644 --- a/dbms/src/Parsers/ASTColumnDeclaration.h +++ b/dbms/src/Parsers/ASTColumnDeclaration.h @@ -36,7 +36,8 @@ public: res->children.push_back(res->default_expression); } - if (comment_expression) { + if (comment_expression) + { res->comment_expression = comment_expression->clone(); res->children.push_back(res->comment_expression); } @@ -62,7 +63,8 @@ public: default_expression->formatImpl(settings, state, frame); } - if (comment_expression) { + if (comment_expression) + { settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "COMMENT" << (settings.hilite ? hilite_none : "") << ' '; comment_expression->formatImpl(settings, state, frame); } diff --git a/dbms/src/Parsers/ParserCreateQuery.h b/dbms/src/Parsers/ParserCreateQuery.h index 3931b228c29..d5ae385e69e 100644 --- a/dbms/src/Parsers/ParserCreateQuery.h +++ b/dbms/src/Parsers/ParserCreateQuery.h @@ -173,7 +173,8 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E column_declaration->children.push_back(std::move(default_expression)); } - if (comment_expression) { + if (comment_expression) + { column_declaration->comment_expression = comment_expression; column_declaration->children.push_back(std::move(comment_expression)); } diff --git a/dbms/src/Storages/ColumnComment.h b/dbms/src/Storages/ColumnComment.h index 33e44fb5188..2e5553a8464 100644 --- a/dbms/src/Storages/ColumnComment.h +++ b/dbms/src/Storages/ColumnComment.h @@ -8,7 +8,8 @@ namespace DB { -struct ColumnComment { +struct ColumnComment +{ ASTPtr expression; }; From bdcdc15b1eb398bf715e05fb3652a21650bbab68 Mon Sep 17 00:00:00 2001 From: Sabyanin Maxim Date: Tue, 6 Nov 2018 16:26:43 +0300 Subject: [PATCH 019/145] fixed comment test. remove ColumnComment struct. style fixes --- .../Interpreters/InterpreterCreateQuery.cpp | 6 ++-- .../Interpreters/InterpreterDescribeQuery.cpp | 24 +++++++++++--- dbms/src/Parsers/ASTAlterQuery.cpp | 2 +- dbms/src/Parsers/ASTColumnDeclaration.h | 13 ++------ dbms/src/Parsers/ParserCreateQuery.h | 4 ++- dbms/src/Parsers/ParserTablePropertiesQuery.h | 1 - dbms/src/Storages/AlterCommands.cpp | 7 ++-- dbms/src/Storages/AlterCommands.h | 6 ++-- dbms/src/Storages/ColumnComment.cpp | 7 ---- dbms/src/Storages/ColumnComment.h | 9 +---- dbms/src/Storages/ColumnsDescription.cpp | 23 +++++++------ .../Storages/System/StorageSystemColumns.cpp | 3 +- .../00725_comment_columns.reference | 8 ++--- .../0_stateless/00725_comment_columns.sql | 33 ++++++++++--------- 14 files changed, 72 insertions(+), 74 deletions(-) delete mode 100644 dbms/src/Storages/ColumnComment.cpp diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 29f613307a9..aeaf48f4617 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -220,9 +220,9 @@ static ParsedColumns parseColumns(const ASTExpressionList & column_list_ast, con default_expr_list->children.emplace_back(setAlias(col_decl.default_expression->clone(), col_decl.name)); } - if (col_decl.comment_expression) + if (!col_decl.comment.empty()) { - comments.emplace(col_decl.name, ColumnComment{col_decl.comment_expression}); + comments.emplace(col_decl.name, col_decl.comment); } } @@ -351,7 +351,7 @@ ASTPtr InterpreterCreateQuery::formatColumns(const ColumnsDescription & columns) const auto comments_it = columns.comments.find(column.name); if (comments_it != std::end(columns.comments)) { - column_declaration->comment_expression = comments_it->second.expression->clone(); + column_declaration->comment = comments_it->second; } columns_list->children.push_back(column_declaration_ptr); diff --git a/dbms/src/Interpreters/InterpreterDescribeQuery.cpp b/dbms/src/Interpreters/InterpreterDescribeQuery.cpp index 536c554e996..6ddd9d93319 100644 --- a/dbms/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/dbms/src/Interpreters/InterpreterDescribeQuery.cpp @@ -45,6 +45,10 @@ Block InterpreterDescribeQuery::getSampleBlock() col.name = "default_expression"; block.insert(col); + // TODO: may be it unneeded + col.name = "comment_expression"; + block.insert(col); + return block; } @@ -55,6 +59,7 @@ BlockInputStreamPtr InterpreterDescribeQuery::executeImpl() NamesAndTypesList columns; ColumnDefaults column_defaults; + ColumnComments column_comments; StoragePtr table; auto table_expression = typeid_cast(ast.table_expression.get()); @@ -101,6 +106,7 @@ BlockInputStreamPtr InterpreterDescribeQuery::executeImpl() auto table_lock = table->lockStructure(false, __PRETTY_FUNCTION__); columns = table->getColumns().getAll(); column_defaults = table->getColumns().defaults; + column_comments = table->getColumns().comments; } Block sample_block = getSampleBlock(); @@ -111,16 +117,26 @@ BlockInputStreamPtr InterpreterDescribeQuery::executeImpl() res_columns[0]->insert(column.name); res_columns[1]->insert(column.type->getName()); - const auto it = column_defaults.find(column.name); - if (it == std::end(column_defaults)) + const auto defaults_it = column_defaults.find(column.name); + if (defaults_it == std::end(column_defaults)) { res_columns[2]->insertDefault(); res_columns[3]->insertDefault(); } else { - res_columns[2]->insert(toString(it->second.kind)); - res_columns[3]->insert(queryToString(it->second.expression)); + res_columns[2]->insert(toString(defaults_it->second.kind)); + res_columns[3]->insert(queryToString(defaults_it->second.expression)); + } + + const auto comments_it = column_comments.find(column.name); + if (comments_it == std::end(column_comments)) + { + res_columns[4]->insertDefault(); + } + else + { + res_columns[4]->insert(comments_it->second); } } diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp index ec43d7c76dc..eee67223496 100644 --- a/dbms/src/Parsers/ASTAlterQuery.cpp +++ b/dbms/src/Parsers/ASTAlterQuery.cpp @@ -146,7 +146,7 @@ void ASTAlterCommand::formatImpl( settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "COMMENT COLUMN " << (settings.hilite ? hilite_none : ""); column->formatImpl(settings, state, frame); settings.ostr << " "; - comment->formatImpl(settings, state, frame); + // comment->formatImpl(settings, state, frame); } else throw Exception("Unexpected type of ALTER", ErrorCodes::UNEXPECTED_AST_STRUCTURE); diff --git a/dbms/src/Parsers/ASTColumnDeclaration.h b/dbms/src/Parsers/ASTColumnDeclaration.h index 0b4aeddb796..0680e69d300 100644 --- a/dbms/src/Parsers/ASTColumnDeclaration.h +++ b/dbms/src/Parsers/ASTColumnDeclaration.h @@ -15,7 +15,7 @@ public: ASTPtr type; String default_specifier; ASTPtr default_expression; - ASTPtr comment_expression; + String comment; String getID() const override { return "ColumnDeclaration_" + name; } @@ -36,12 +36,6 @@ public: res->children.push_back(res->default_expression); } - if (comment_expression) - { - res->comment_expression = comment_expression->clone(); - res->children.push_back(res->comment_expression); - } - return res; } @@ -63,10 +57,9 @@ public: default_expression->formatImpl(settings, state, frame); } - if (comment_expression) + if (!comment.empty()) { - settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "COMMENT" << (settings.hilite ? hilite_none : "") << ' '; - comment_expression->formatImpl(settings, state, frame); + settings.ostr << ' ' << (settings.hilite ? hilite_none : "") << "COMMENT" << (settings.hilite ? hilite_none : "") << ' ' << comment; } } }; diff --git a/dbms/src/Parsers/ParserCreateQuery.h b/dbms/src/Parsers/ParserCreateQuery.h index d5ae385e69e..6705a38c63a 100644 --- a/dbms/src/Parsers/ParserCreateQuery.h +++ b/dbms/src/Parsers/ParserCreateQuery.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -175,7 +176,8 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E if (comment_expression) { - column_declaration->comment_expression = comment_expression; + auto & literal_value = typeid_cast(*comment_expression).value; + column_declaration->comment = literal_value.safeGet(); column_declaration->children.push_back(std::move(comment_expression)); } diff --git a/dbms/src/Parsers/ParserTablePropertiesQuery.h b/dbms/src/Parsers/ParserTablePropertiesQuery.h index f97babe194a..c9f8abf8f19 100644 --- a/dbms/src/Parsers/ParserTablePropertiesQuery.h +++ b/dbms/src/Parsers/ParserTablePropertiesQuery.h @@ -7,7 +7,6 @@ namespace DB { -// TODO: возможно тут тоже надо разобраться /** Query (EXISTS | SHOW CREATE) [TABLE] [db.]name [FORMAT format] */ class ParserTablePropertiesQuery : public IParserBase diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index 5b9b6f3b0db..fa0127a26b7 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -94,9 +94,10 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ { AlterCommand command; command.type = COMMENT_COLUMN; - const auto & ast_identifier = typeid_cast(*command_ast->column); + const auto & ast_identifier = typeid_cast(*command_ast->column); command.column_name = ast_identifier.name; - command.comment_expression = command_ast->comment; + const auto & ast_comment = typeid_cast(*command_ast->comment); + command.comment = ast_comment.value.get(); return command; } else @@ -249,7 +250,7 @@ void AlterCommand::apply(ColumnsDescription & columns_description) const else if (type == COMMENT_COLUMN) { - columns_description.comments[column_name].expression = comment_expression; + columns_description.comments[column_name] = comment; } else throw Exception("Wrong parameter type in ALTER query", ErrorCodes::LOGICAL_ERROR); diff --git a/dbms/src/Storages/AlterCommands.h b/dbms/src/Storages/AlterCommands.h index 1bbf3ddbee0..7c56740a2fe 100644 --- a/dbms/src/Storages/AlterCommands.h +++ b/dbms/src/Storages/AlterCommands.h @@ -36,7 +36,7 @@ struct AlterCommand ColumnDefaultKind default_kind{}; ASTPtr default_expression{}; - ASTPtr comment_expression; + String comment; /// For ADD - after which column to add a new one. If an empty string, add to the end. To add to the beginning now it is impossible. String after_column; @@ -47,9 +47,9 @@ struct AlterCommand AlterCommand() = default; AlterCommand(const Type type, const String & column_name, const DataTypePtr & data_type, const ColumnDefaultKind default_kind, const ASTPtr & default_expression, - const String & after_column = String{}, const ASTPtr & comment_expression = nullptr) + const String & after_column = String{}, const String & comment = "") : type{type}, column_name{column_name}, data_type{data_type}, default_kind{default_kind}, - default_expression{default_expression}, comment_expression(comment_expression), after_column{after_column} + default_expression{default_expression}, comment(comment), after_column{after_column} {} static std::optional parse(const ASTAlterCommand * command); diff --git a/dbms/src/Storages/ColumnComment.cpp b/dbms/src/Storages/ColumnComment.cpp deleted file mode 100644 index 076ec1e19df..00000000000 --- a/dbms/src/Storages/ColumnComment.cpp +++ /dev/null @@ -1,7 +0,0 @@ -#include -#include - -bool DB::operator== (const DB::ColumnComment& lhs, const DB::ColumnComment& rhs) -{ - return queryToString(lhs.expression) == queryToString(rhs.expression); -} diff --git a/dbms/src/Storages/ColumnComment.h b/dbms/src/Storages/ColumnComment.h index 2e5553a8464..521d1fd9d17 100644 --- a/dbms/src/Storages/ColumnComment.h +++ b/dbms/src/Storages/ColumnComment.h @@ -8,13 +8,6 @@ namespace DB { -struct ColumnComment -{ - ASTPtr expression; -}; - -bool operator== (const ColumnComment& lhs, const ColumnComment& rhs); - -using ColumnComments = std::unordered_map; +using ColumnComments = std::unordered_map; } diff --git a/dbms/src/Storages/ColumnsDescription.cpp b/dbms/src/Storages/ColumnsDescription.cpp index e3e76f6012a..da540258329 100644 --- a/dbms/src/Storages/ColumnsDescription.cpp +++ b/dbms/src/Storages/ColumnsDescription.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -10,6 +11,7 @@ #include #include #include +#include #include #include @@ -82,7 +84,7 @@ String ColumnsDescription::toString() const writeChar(' ', buf); writeText(column.type->getName(), buf); - const bool exist_comment = comments_it != std::end(comments) && !comments_it->second.expression; + const bool exist_comment = comments_it != std::end(comments); if (defaults_it != std::end(defaults)) { writeChar('\t', buf); @@ -98,7 +100,7 @@ String ColumnsDescription::toString() const if (exist_comment) { writeChar('\t', buf); - writeText(queryToString(comments_it->second.expression), buf); + writeText(comments_it->second, buf); } writeChar('\n', buf); @@ -146,7 +148,7 @@ std::optional parseDefaulfInfo(ReadBufferFromString & buf) return ParsedDefaultInfo{default_kind, std::move(default_expr)}; } -ASTPtr parseCommentExpr(ReadBufferFromString& buf) +String parseComment(ReadBufferFromString& buf) { if (*buf.position() == '\n') { @@ -154,12 +156,9 @@ ASTPtr parseCommentExpr(ReadBufferFromString& buf) } ParserExpression parser_expr; - String comment_expr_str; - readText(comment_expr_str, buf); - const char * begin = comment_expr_str.data(); - const auto end = begin + comment_expr_str.size(); - ASTPtr comment_expr = parseQuery(parser_expr, begin, end, "comment_expression", 0); - return comment_expr; + String comment; + readText(comment, buf); // This is wrong may be + return comment; } ColumnsDescription ColumnsDescription::parse(const String & str) @@ -209,10 +208,10 @@ ColumnsDescription ColumnsDescription::parse(const String & str) result.defaults.emplace(column_name, ColumnDefault{default_kind, default_expr}); } - const auto comment_expr = parseCommentExpr(buf); - if (comment_expr) + const auto comment = parseComment(buf); + if (!comment.empty()) { - result.comments.emplace(column_name, ColumnComment{comment_expr}); + result.comments.emplace(column_name, comment); } assertChar('\n', buf); diff --git a/dbms/src/Storages/System/StorageSystemColumns.cpp b/dbms/src/Storages/System/StorageSystemColumns.cpp index b546a275f0b..45ffb73f9af 100644 --- a/dbms/src/Storages/System/StorageSystemColumns.cpp +++ b/dbms/src/Storages/System/StorageSystemColumns.cpp @@ -186,9 +186,8 @@ protected: } else { - const auto & literal = typeid_cast(it->second.expression.get()); if (columns_mask[src_index++]) - res_columns[res_index++]->insert(literal->value.get()); + res_columns[res_index++]->insert(it->second); } } diff --git a/dbms/tests/queries/0_stateless/00725_comment_columns.reference b/dbms/tests/queries/0_stateless/00725_comment_columns.reference index 8d7837d8a31..19dac99b721 100644 --- a/dbms/tests/queries/0_stateless/00725_comment_columns.reference +++ b/dbms/tests/queries/0_stateless/00725_comment_columns.reference @@ -1,4 +1,4 @@ -CREATE TABLE default.check_query_comment_column ( first_column UInt8 DEFAULT 1 COMMENT \'first comment\', fourth_column UInt8 COMMENT \'fourth comment\', fifth_column UInt8, second_column UInt8 MATERIALIZED first_column COMMENT \'second comment\', third_column UInt8 ALIAS second_column COMMENT \'third comment\') ENGINE = TinyLog +CREATE TABLE test.check_query_comment_column ( first_column UInt8 DEFAULT 1 COMMENT \'first comment\', fourth_column UInt8 COMMENT \'fourth comment\', fifth_column UInt8, second_column UInt8 MATERIALIZED first_column COMMENT \'second comment\', third_column UInt8 ALIAS second_column COMMENT \'third comment\') ENGINE = TinyLog ┌─table──────────────────────┬─name──────────┬─comment────────┐ │ check_query_comment_column │ first_column │ first comment │ │ check_query_comment_column │ fourth_column │ fourth comment │ @@ -6,7 +6,7 @@ CREATE TABLE default.check_query_comment_column ( first_column UInt8 DEFAULT 1 C │ check_query_comment_column │ second_column │ second comment │ │ check_query_comment_column │ third_column │ third comment │ └────────────────────────────┴───────────────┴────────────────┘ -CREATE TABLE default.check_query_comment_column ( first_column UInt8 DEFAULT 1 COMMENT \'another first column\', fourth_column UInt8 COMMENT \'another fourth column\', fifth_column UInt8 COMMENT \'another fifth column\', second_column UInt8 MATERIALIZED first_column COMMENT \'another second column\', third_column UInt8 ALIAS second_column COMMENT \'another third column\') ENGINE = TinyLog +CREATE TABLE test.check_query_comment_column ( first_column UInt8 DEFAULT 1 COMMENT \'another first column\', fourth_column UInt8 COMMENT \'another fourth column\', fifth_column UInt8 COMMENT \'another fifth column\', second_column UInt8 MATERIALIZED first_column COMMENT \'another second column\', third_column UInt8 ALIAS second_column COMMENT \'another third column\') ENGINE = TinyLog ┌─table──────────────────────┬─name──────────┬─comment───────────────┐ │ check_query_comment_column │ first_column │ another first column │ │ check_query_comment_column │ fourth_column │ another fourth column │ @@ -14,13 +14,13 @@ CREATE TABLE default.check_query_comment_column ( first_column UInt8 DEFAULT 1 C │ check_query_comment_column │ second_column │ another second column │ │ check_query_comment_column │ third_column │ another third column │ └────────────────────────────┴───────────────┴───────────────────────┘ -CREATE TABLE default.check_query_comment_column ( first_column Date COMMENT \'first comment\', second_column UInt8 COMMENT \'second comment\', third_column UInt8 COMMENT \'third comment\') ENGINE = MergeTree(first_column, (second_column, second_column), 8192) +CREATE TABLE test.check_query_comment_column ( first_column Date COMMENT \'first comment\', second_column UInt8 COMMENT \'second comment\', third_column UInt8 COMMENT \'third comment\') ENGINE = MergeTree(first_column, (second_column, second_column), 8192) ┌─table──────────────────────┬─name──────────┬─comment────────┐ │ check_query_comment_column │ first_column │ first comment │ │ check_query_comment_column │ second_column │ second comment │ │ check_query_comment_column │ third_column │ third comment │ └────────────────────────────┴───────────────┴────────────────┘ -CREATE TABLE default.check_query_comment_column ( first_column Date COMMENT \'another first comment\', second_column UInt8 COMMENT \'another second comment\', third_column UInt8 COMMENT \'another third comment\') ENGINE = MergeTree(first_column, (second_column, second_column), 8192) +CREATE TABLE test.check_query_comment_column ( first_column Date COMMENT \'another first comment\', second_column UInt8 COMMENT \'another second comment\', third_column UInt8 COMMENT \'another third comment\') ENGINE = MergeTree(first_column, (second_column, second_column), 8192) ┌─table──────────────────────┬─name──────────┬─comment────────────────┐ │ check_query_comment_column │ first_column │ another first comment │ │ check_query_comment_column │ second_column │ another second comment │ diff --git a/dbms/tests/queries/0_stateless/00725_comment_columns.sql b/dbms/tests/queries/0_stateless/00725_comment_columns.sql index 6c2bd2305b1..afcb1b34eab 100644 --- a/dbms/tests/queries/0_stateless/00725_comment_columns.sql +++ b/dbms/tests/queries/0_stateless/00725_comment_columns.sql @@ -1,6 +1,7 @@ -DROP TABLE IF EXISTS check_query_comment_column; +CREATE DATABASE IF NOT EXISTS test; +DROP TABLE IF EXISTS test.check_query_comment_column; -CREATE TABLE check_query_comment_column +CREATE TABLE test.check_query_comment_column ( first_column UInt8 DEFAULT 1 COMMENT 'first comment', second_column UInt8 MATERIALIZED first_column COMMENT 'second comment', @@ -9,52 +10,54 @@ CREATE TABLE check_query_comment_column fifth_column UInt8 ) ENGINE = TinyLog; -SHOW CREATE TABLE check_query_comment_column; +SHOW CREATE TABLE test.check_query_comment_column; SELECT table, name, comment FROM system.columns -WHERE table = 'check_query_comment_column' +WHERE table = 'check_query_comment_column' AND database = 'test' FORMAT PrettyCompactNoEscapes; -ALTER TABLE check_query_comment_column +ALTER TABLE test.check_query_comment_column COMMENT COLUMN first_column 'another first column', COMMENT COLUMN second_column 'another second column', COMMENT COLUMN third_column 'another third column', COMMENT COLUMN fourth_column 'another fourth column', COMMENT COLUMN fifth_column 'another fifth column'; -SHOW CREATE TABLE check_query_comment_column; +SHOW CREATE TABLE test.check_query_comment_column; SELECT table, name, comment FROM system.columns -WHERE table = 'check_query_comment_column' +WHERE table = 'check_query_comment_column' AND database = 'test' FORMAT PrettyCompactNoEscapes; -DROP TABLE IF EXISTS check_query_comment_column; +DROP TABLE IF EXISTS test.check_query_comment_column; -CREATE TABLE check_query_comment_column +CREATE TABLE test.check_query_comment_column ( first_column Date COMMENT 'first comment', second_column UInt8 COMMENT 'second comment', third_column UInt8 COMMENT 'third comment' ) ENGINE = MergeTree(first_column, (second_column, second_column), 8192); -SHOW CREATE TABLE check_query_comment_column; +SHOW CREATE TABLE test.check_query_comment_column; SELECT table, name, comment FROM system.columns -WHERE table = 'check_query_comment_column' +WHERE table = 'check_query_comment_column' AND database = 'test' FORMAT PrettyCompactNoEscapes; -ALTER TABLE check_query_comment_column +ALTER TABLE test.check_query_comment_column COMMENT COLUMN first_column 'another first comment', COMMENT COLUMN second_column 'another second comment', COMMENT COLUMN third_column 'another third comment'; -SHOW CREATE TABLE check_query_comment_column; +SHOW CREATE TABLE test.check_query_comment_column; SELECT table, name, comment FROM system.columns -WHERE table = 'check_query_comment_column' -FORMAT PrettyCompactNoEscapes; \ No newline at end of file +WHERE table = 'check_query_comment_column' and database = 'test' +FORMAT PrettyCompactNoEscapes; + +DROP table test.check_query_comment_column; \ No newline at end of file From f8f2779f7b227a5bca98d08b7d3069674bd060bc Mon Sep 17 00:00:00 2001 From: Sabyanin Maxim Date: Thu, 8 Nov 2018 15:03:42 +0300 Subject: [PATCH 020/145] make ColumnComment just a hashmap --- .../src/Interpreters/InterpreterCreateQuery.cpp | 8 +++++--- dbms/src/Parsers/ASTAlterQuery.cpp | 6 +++--- dbms/src/Parsers/ASTColumnDeclaration.h | 13 ++++++++++--- dbms/src/Parsers/ParserCreateQuery.h | 3 +-- dbms/src/Storages/ColumnComment.h | 4 ++-- dbms/src/Storages/ColumnsDescription.cpp | 17 ++++++++--------- dbms/src/Storages/ColumnsDescription.h | 2 +- 7 files changed, 30 insertions(+), 23 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 7551eb9ea23..d7a223194b5 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -220,9 +221,10 @@ static ParsedColumns parseColumns(const ASTExpressionList & column_list_ast, con default_expr_list->children.emplace_back(setAlias(col_decl.default_expression->clone(), col_decl.name)); } - if (!col_decl.comment.empty()) + if (col_decl.comment) { - comments.emplace(col_decl.name, col_decl.comment); + auto comment_literal = typeid_cast(*col_decl.comment); + comments.emplace(col_decl.name, comment_literal.value.get()); } } @@ -351,7 +353,7 @@ ASTPtr InterpreterCreateQuery::formatColumns(const ColumnsDescription & columns) const auto comments_it = columns.comments.find(column.name); if (comments_it != std::end(columns.comments)) { - column_declaration->comment = comments_it->second; + column_declaration->comment = std::make_shared(Field(comments_it->second)); } columns_list->children.push_back(column_declaration_ptr); diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp index eee67223496..65f281a3f71 100644 --- a/dbms/src/Parsers/ASTAlterQuery.cpp +++ b/dbms/src/Parsers/ASTAlterQuery.cpp @@ -143,10 +143,10 @@ void ASTAlterCommand::formatImpl( } else if (type == ASTAlterCommand::COMMENT_COLUMN) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "COMMENT COLUMN " << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "COMMENT COLUMN " << (settings.hilite ? hilite_identifier : ""); column->formatImpl(settings, state, frame); - settings.ostr << " "; - // comment->formatImpl(settings, state, frame); + settings.ostr << " " << (settings.hilite ? hilite_none : ""); + comment->formatImpl(settings, state, frame); } else throw Exception("Unexpected type of ALTER", ErrorCodes::UNEXPECTED_AST_STRUCTURE); diff --git a/dbms/src/Parsers/ASTColumnDeclaration.h b/dbms/src/Parsers/ASTColumnDeclaration.h index 0680e69d300..010c845b244 100644 --- a/dbms/src/Parsers/ASTColumnDeclaration.h +++ b/dbms/src/Parsers/ASTColumnDeclaration.h @@ -15,7 +15,7 @@ public: ASTPtr type; String default_specifier; ASTPtr default_expression; - String comment; + ASTPtr comment; String getID() const override { return "ColumnDeclaration_" + name; } @@ -36,6 +36,12 @@ public: res->children.push_back(res->default_expression); } + if (comment) + { + res->comment = comment->clone(); + res->children.push_back(res->comment); + } + return res; } @@ -57,9 +63,10 @@ public: default_expression->formatImpl(settings, state, frame); } - if (!comment.empty()) + if (comment) { - settings.ostr << ' ' << (settings.hilite ? hilite_none : "") << "COMMENT" << (settings.hilite ? hilite_none : "") << ' ' << comment; + settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "COMMENT " << (settings.hilite ? hilite_none : "") << ' '; + comment->formatImpl(settings, state, frame); } } }; diff --git a/dbms/src/Parsers/ParserCreateQuery.h b/dbms/src/Parsers/ParserCreateQuery.h index 6705a38c63a..7d2bdb382cd 100644 --- a/dbms/src/Parsers/ParserCreateQuery.h +++ b/dbms/src/Parsers/ParserCreateQuery.h @@ -176,8 +176,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E if (comment_expression) { - auto & literal_value = typeid_cast(*comment_expression).value; - column_declaration->comment = literal_value.safeGet(); + column_declaration->comment = comment_expression; column_declaration->children.push_back(std::move(comment_expression)); } diff --git a/dbms/src/Storages/ColumnComment.h b/dbms/src/Storages/ColumnComment.h index 521d1fd9d17..0fc4e7e2742 100644 --- a/dbms/src/Storages/ColumnComment.h +++ b/dbms/src/Storages/ColumnComment.h @@ -1,10 +1,10 @@ #pragma once +#include + #include #include -#include - namespace DB { diff --git a/dbms/src/Storages/ColumnsDescription.cpp b/dbms/src/Storages/ColumnsDescription.cpp index da540258329..7ac7b07d955 100644 --- a/dbms/src/Storages/ColumnsDescription.cpp +++ b/dbms/src/Storages/ColumnsDescription.cpp @@ -100,7 +100,7 @@ String ColumnsDescription::toString() const if (exist_comment) { writeChar('\t', buf); - writeText(comments_it->second, buf); + writeText(queryToString(ASTLiteral(Field(comments_it->second))), buf); } writeChar('\n', buf); @@ -120,7 +120,7 @@ struct ParsedDefaultInfo ASTPtr default_expr_str; }; -std::optional parseDefaulfInfo(ReadBufferFromString & buf) +std::optional parseDefaultInfo(ReadBufferFromString & buf) { if (*buf.position() == '\n') { @@ -142,9 +142,7 @@ std::optional parseDefaulfInfo(ReadBufferFromString & buf) ParserExpression expr_parser; String default_expr_str; readText(default_expr_str, buf); - const char * begin = default_expr_str.data(); - const auto end = begin + default_expr_str.size(); - ASTPtr default_expr = parseQuery(expr_parser, begin, end, "default_expression", 0); + ASTPtr default_expr = parseQuery(expr_parser, default_expr_str, "default_expression", 0); return ParsedDefaultInfo{default_kind, std::move(default_expr)}; } @@ -156,9 +154,10 @@ String parseComment(ReadBufferFromString& buf) } ParserExpression parser_expr; - String comment; - readText(comment, buf); // This is wrong may be - return comment; + String comment_expr_str; + readText(comment_expr_str, buf); // This is wrong may be + ASTPtr comment_expr = parseQuery(parser_expr, comment_expr_str, "comment expression", 0); + return typeid_cast(*comment_expr).value.get(); } ColumnsDescription ColumnsDescription::parse(const String & str) @@ -193,7 +192,7 @@ ColumnsDescription ColumnsDescription::parse(const String & str) assertChar('\t', buf); - const auto default_info = parseDefaulfInfo(buf); + const auto default_info = parseDefaultInfo(buf); if (default_info) { const auto & default_kind = default_info->default_kind; diff --git a/dbms/src/Storages/ColumnsDescription.h b/dbms/src/Storages/ColumnsDescription.h index 69738d83861..0fe88140ba1 100644 --- a/dbms/src/Storages/ColumnsDescription.h +++ b/dbms/src/Storages/ColumnsDescription.h @@ -24,7 +24,7 @@ struct ColumnsDescription NamesAndTypesList ordinary_, NamesAndTypesList materialized_, NamesAndTypesList aliases_, - ColumnDefaults defaults_, + ColumnDefaults defaults_ = {}, ColumnComments comments_ = {}) : ordinary(std::move(ordinary_)) , materialized(std::move(materialized_)) From c9d8dc6171ae92664a59242940acfc12c0abc418 Mon Sep 17 00:00:00 2001 From: Sabyanin Maxim Date: Thu, 8 Nov 2018 20:45:41 +0300 Subject: [PATCH 021/145] add primary_key, partition_key, sampling_key sections in system.tables. add getPartitionExpression method in IStorage. --- dbms/src/Storages/IStorage.h | 3 ++ .../Storages/System/StorageSystemTables.cpp | 48 +++++++++++++++++-- 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index a0a61035580..e66f7b7df10 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -349,6 +349,9 @@ public: /// Returns primary expression for storage or nullptr if there is no. virtual ASTPtr getPrimaryExpression() const { return nullptr; } + /// Returns partition expression for storage or nullptr if there is no. + virtual ASTPtr getPartitionExpression() const {return nullptr; } + using ITableDeclaration::ITableDeclaration; using std::enable_shared_from_this::shared_from_this; diff --git a/dbms/src/Storages/System/StorageSystemTables.cpp b/dbms/src/Storages/System/StorageSystemTables.cpp index 8ba44b1d4ad..d25950f7e87 100644 --- a/dbms/src/Storages/System/StorageSystemTables.cpp +++ b/dbms/src/Storages/System/StorageSystemTables.cpp @@ -39,7 +39,10 @@ StorageSystemTables::StorageSystemTables(const std::string & name_) {"dependencies_database", std::make_shared(std::make_shared())}, {"dependencies_table", std::make_shared(std::make_shared())}, {"create_table_query", std::make_shared()}, - {"engine_full", std::make_shared()} + {"engine_full", std::make_shared()}, + {"primary_key", std::make_shared()}, + {"partition_key", std::make_shared()}, + {"sample_key", std::make_shared()}, })); } @@ -65,7 +68,7 @@ public: size_t max_block_size, ColumnPtr databases, const Context & context) - : columns_mask(columns_mask), header(header), max_block_size(max_block_size), databases(std::move(databases)), context(context) {} + : columns_mask(std::move(columns_mask)), header(std::move(header)), max_block_size(max_block_size), databases(std::move(databases)), context(context) {} String getName() const override { return "Tables"; } Block getHeader() const override { return header; } @@ -144,6 +147,15 @@ protected: if (columns_mask[src_index++]) res_columns[res_index++]->insert(table.second->getName()); + + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); } } @@ -173,7 +185,7 @@ protected: res_columns[res_index++]->insert(tables_it->table()->getName()); if (columns_mask[src_index++]) - res_columns[res_index++]->insert(0u); + res_columns[res_index++]->insert(0u); // is_temporary if (columns_mask[src_index++]) res_columns[res_index++]->insert(tables_it->table()->getDataPath()); @@ -234,6 +246,36 @@ protected: res_columns[res_index++]->insert(engine_full); } } + else + { + src_index += 2; + } + + const auto table_it = context.getTable(database_name, table_name); + ASTPtr expression_ptr; + if (columns_mask[src_index++]) + { + if ((expression_ptr = table_it->getPrimaryExpression())) + res_columns[res_index++]->insert(queryToString(expression_ptr)); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if ((expression_ptr = table_it->getPartitionExpression())) + res_columns[res_index++]->insert(queryToString(expression_ptr)); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if ((expression_ptr = table_it->getSamplingExpression())) + res_columns[res_index++]->insert(queryToString(expression_ptr)); + else + res_columns[res_index++]->insertDefault(); + } } } From 51c7284583b611bc94da4e9c85f340d6a1248eac Mon Sep 17 00:00:00 2001 From: Sabyanin Maxim Date: Fri, 9 Nov 2018 12:39:07 +0300 Subject: [PATCH 022/145] add getSecondaryExpression --- dbms/src/Storages/IStorage.h | 5 ++++- dbms/src/Storages/StorageMergeTree.h | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index e66f7b7df10..8a6945b85fe 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -350,7 +350,10 @@ public: virtual ASTPtr getPrimaryExpression() const { return nullptr; } /// Returns partition expression for storage or nullptr if there is no. - virtual ASTPtr getPartitionExpression() const {return nullptr; } + virtual ASTPtr getPartitionExpression() const { return nullptr; } + + /// Returns secondary expression for storage or nullptr if there is no. + virtual ASTPtr getSecondaryExpression() const { return nullptr; } using ITableDeclaration::ITableDeclaration; using std::enable_shared_from_this::shared_from_this; diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index 0182f31dc8e..d495f318622 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -98,6 +98,10 @@ public: ASTPtr getPrimaryExpression() const override { return data.primary_expr_ast; } + ASTPtr getPartitionExpression() const override { return data.partition_expr_ast; } + + ASTPtr getSecondaryExpression() const override { return data.secondary_sort_expr_ast; } + private: String path; String database_name; From ac6aaa37b55742acc795c8356b8f6fb9aa0f3b83 Mon Sep 17 00:00:00 2001 From: Sabyanin Maxim Date: Fri, 9 Nov 2018 15:02:08 +0300 Subject: [PATCH 023/145] add is_primary_key, is_order_key, is_partition_key, is_sampling_key sections in system.columns --- dbms/src/Storages/ColumnsDescription.h | 29 ++++++++++++++++ .../Storages/System/StorageSystemColumns.cpp | 33 +++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/dbms/src/Storages/ColumnsDescription.h b/dbms/src/Storages/ColumnsDescription.h index 288d2712b3b..e86ca2dbc6b 100644 --- a/dbms/src/Storages/ColumnsDescription.h +++ b/dbms/src/Storages/ColumnsDescription.h @@ -9,12 +9,41 @@ namespace DB { +enum class TypeOfPresenceInTableDeclaration : int32_t +{ + InPrimaryKey = 1<<0, + InOrderKey = 1<<1, + InPartitionKey = 1<<2, + InSamplingKey = 1<<3, +}; + + +class ColumnPresenceInTableDeclaration +{ +public: + bool Get(TypeOfPresenceInTableDeclaration type) + { + return static_cast(presenceMask & static_cast(type)); + } + + void Set(TypeOfPresenceInTableDeclaration type) + { + presenceMask &= static_cast(type); + } + +private: + int32_t presenceMask; +}; + +using ColumnPresencesInTableDeclaration = std::unordered_map; + struct ColumnsDescription { NamesAndTypesList ordinary; NamesAndTypesList materialized; NamesAndTypesList aliases; ColumnDefaults defaults; + ColumnPresencesInTableDeclaration presences; ColumnsDescription() = default; diff --git a/dbms/src/Storages/System/StorageSystemColumns.cpp b/dbms/src/Storages/System/StorageSystemColumns.cpp index dbb80c62c6f..3aa60b3beb2 100644 --- a/dbms/src/Storages/System/StorageSystemColumns.cpp +++ b/dbms/src/Storages/System/StorageSystemColumns.cpp @@ -36,6 +36,10 @@ StorageSystemColumns::StorageSystemColumns(const std::string & name_) { "data_compressed_bytes", std::make_shared() }, { "data_uncompressed_bytes", std::make_shared() }, { "marks_bytes", std::make_shared() }, + { "is_in_primary_key", std::make_shared()}, + { "is_in_order_key", std::make_shared()}, + { "is_in_partition_key", std::make_shared()}, + { "is_in_sample_key", std::make_shared()}, })); } @@ -80,6 +84,7 @@ protected: NamesAndTypesList columns; ColumnDefaults column_defaults; + ColumnPresencesInTableDeclaration column_presences; MergeTreeData::ColumnSizeByName column_sizes; { @@ -105,6 +110,7 @@ protected: columns = storage->getColumns().getAll(); column_defaults = storage->getColumns().defaults; + column_presences = storage->getColumns().presences; /** Info about sizes of columns for tables of MergeTree family. * NOTE: It is possible to add getter for this info to IStorage interface. @@ -173,6 +179,33 @@ protected: } } + { + const auto it = column_presences.find(column.name); + if (it == std::end(column_presences)) + { + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + } + else + { + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(it->second.Get(TypeOfPresenceInTableDeclaration::InPrimaryKey)); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(it->second.Get(TypeOfPresenceInTableDeclaration::InOrderKey)); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(it->second.Get(TypeOfPresenceInTableDeclaration::InPartitionKey)); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(it->second.Get(TypeOfPresenceInTableDeclaration::InSamplingKey)); + } + + } + ++rows_count; } } From 9befbd94e467b3c5713b83ed4fc44077227be119 Mon Sep 17 00:00:00 2001 From: Sabyanin Maxim Date: Fri, 9 Nov 2018 18:39:21 +0300 Subject: [PATCH 024/145] is_in_order_key, is_in_partition_key, is_in_sample_key in system.columns works, is_in_primary_key not yet --- dbms/src/Databases/DatabasesCommon.cpp | 2 +- .../Interpreters/InterpreterCreateQuery.cpp | 35 +++++++++++++++++-- .../src/Interpreters/InterpreterCreateQuery.h | 2 +- .../Interpreters/InterpreterSystemQuery.cpp | 2 +- dbms/src/Storages/ColumnsDescription.h | 17 ++++----- .../Storages/System/StorageSystemColumns.cpp | 10 +++--- 6 files changed, 49 insertions(+), 19 deletions(-) diff --git a/dbms/src/Databases/DatabasesCommon.cpp b/dbms/src/Databases/DatabasesCommon.cpp index 2617390fd6d..daff1bc8ee5 100644 --- a/dbms/src/Databases/DatabasesCommon.cpp +++ b/dbms/src/Databases/DatabasesCommon.cpp @@ -70,7 +70,7 @@ std::pair createTableFromDefinition( if (!ast_create_query.columns) throw Exception("Missing definition of columns.", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED); - ColumnsDescription columns = InterpreterCreateQuery::getColumnsDescription(*ast_create_query.columns, context); + ColumnsDescription columns = InterpreterCreateQuery::getColumnsDescription(ast_create_query, context); return { diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 6dfb99bd4ac..aac1d047499 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -348,9 +348,31 @@ ASTPtr InterpreterCreateQuery::formatColumns(const ColumnsDescription & columns) return columns_list; } - -ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTExpressionList & columns, const Context & context) +void FillColumnPresenceInTableDeclaration(ColumnPresences & presences, IAST * ast, PresenceType type) { + if (!ast) + return; + + IdentifierNameSet names_set; + ast->collectIdentifierNames(names_set); + for (const auto & name : names_set) + { + auto it = presences.find(name); + if (it == presences.end()) + it = presences.insert({name, ColumnPresence()}).first; + + it->second.Set(type); + } +} + +ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTCreateQuery & ast, const Context & context) +{ + // TODO: или это было не правильное решение? + if (ast.columns == nullptr) + return {}; + + const ASTExpressionList & columns = *ast.columns; + const ASTStorage * storage_def = ast.storage; ColumnsDescription res; auto && columns_and_defaults = parseColumns(columns, context); @@ -359,6 +381,13 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTExpres res.ordinary = std::move(columns_and_defaults.first); res.defaults = std::move(columns_and_defaults.second); + if (storage_def && storage_def->engine) + { + FillColumnPresenceInTableDeclaration(res.presences, storage_def->order_by, PresenceType::InOrderKey); + FillColumnPresenceInTableDeclaration(res.presences, storage_def->partition_by, PresenceType::InPartitionKey); + FillColumnPresenceInTableDeclaration(res.presences, storage_def->sample_by, PresenceType::InSamplingKey); + } + if (res.ordinary.size() + res.materialized.size() == 0) throw Exception{"Cannot CREATE table without physical columns", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED}; @@ -400,7 +429,7 @@ ColumnsDescription InterpreterCreateQuery::setColumns( if (create.columns) { - res = getColumnsDescription(*create.columns, context); + res = getColumnsDescription(create, context); } else if (!create.as_table.empty()) { diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.h b/dbms/src/Interpreters/InterpreterCreateQuery.h index 2f186764866..d98abdf6cdc 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.h +++ b/dbms/src/Interpreters/InterpreterCreateQuery.h @@ -45,7 +45,7 @@ public: } /// Obtain information about columns, their types and default values, for case when columns in CREATE query is specified explicitly. - static ColumnsDescription getColumnsDescription(const ASTExpressionList & columns, const Context & context); + static ColumnsDescription getColumnsDescription(const ASTCreateQuery & columns, const Context & context); /// Check that column types are allowed for usage in table according to settings. static void checkSupportedTypes(const ColumnsDescription & columns, const Context & context); diff --git a/dbms/src/Interpreters/InterpreterSystemQuery.cpp b/dbms/src/Interpreters/InterpreterSystemQuery.cpp index 749827896aa..f0705f07cd0 100644 --- a/dbms/src/Interpreters/InterpreterSystemQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSystemQuery.cpp @@ -251,7 +251,7 @@ StoragePtr InterpreterSystemQuery::tryRestartReplica(const String & database_nam create.attach = true; std::string data_path = database->getDataPath(); - auto columns = InterpreterCreateQuery::getColumnsDescription(*create.columns, system_context); + auto columns = InterpreterCreateQuery::getColumnsDescription(create, system_context); StoragePtr table = StorageFactory::instance().get(create, data_path, diff --git a/dbms/src/Storages/ColumnsDescription.h b/dbms/src/Storages/ColumnsDescription.h index e86ca2dbc6b..aef00eb9fd2 100644 --- a/dbms/src/Storages/ColumnsDescription.h +++ b/dbms/src/Storages/ColumnsDescription.h @@ -9,7 +9,7 @@ namespace DB { -enum class TypeOfPresenceInTableDeclaration : int32_t +enum class PresenceType : int32_t { InPrimaryKey = 1<<0, InOrderKey = 1<<1, @@ -18,24 +18,25 @@ enum class TypeOfPresenceInTableDeclaration : int32_t }; -class ColumnPresenceInTableDeclaration +// TODO: не очень классное название +class ColumnPresence { public: - bool Get(TypeOfPresenceInTableDeclaration type) + bool Get(PresenceType type) { return static_cast(presenceMask & static_cast(type)); } - void Set(TypeOfPresenceInTableDeclaration type) + void Set(PresenceType type) { - presenceMask &= static_cast(type); + presenceMask |= static_cast(type); } private: - int32_t presenceMask; + int32_t presenceMask = 0; }; -using ColumnPresencesInTableDeclaration = std::unordered_map; +using ColumnPresences = std::unordered_map; struct ColumnsDescription { @@ -43,7 +44,7 @@ struct ColumnsDescription NamesAndTypesList materialized; NamesAndTypesList aliases; ColumnDefaults defaults; - ColumnPresencesInTableDeclaration presences; + ColumnPresences presences; ColumnsDescription() = default; diff --git a/dbms/src/Storages/System/StorageSystemColumns.cpp b/dbms/src/Storages/System/StorageSystemColumns.cpp index 3aa60b3beb2..2defae718d9 100644 --- a/dbms/src/Storages/System/StorageSystemColumns.cpp +++ b/dbms/src/Storages/System/StorageSystemColumns.cpp @@ -84,7 +84,7 @@ protected: NamesAndTypesList columns; ColumnDefaults column_defaults; - ColumnPresencesInTableDeclaration column_presences; + ColumnPresences column_presences; MergeTreeData::ColumnSizeByName column_sizes; { @@ -195,13 +195,13 @@ protected: else { if (columns_mask[src_index++]) - res_columns[res_index++]->insert(it->second.Get(TypeOfPresenceInTableDeclaration::InPrimaryKey)); + res_columns[res_index++]->insert(it->second.Get(PresenceType::InPrimaryKey)); if (columns_mask[src_index++]) - res_columns[res_index++]->insert(it->second.Get(TypeOfPresenceInTableDeclaration::InOrderKey)); + res_columns[res_index++]->insert(it->second.Get(PresenceType::InOrderKey)); if (columns_mask[src_index++]) - res_columns[res_index++]->insert(it->second.Get(TypeOfPresenceInTableDeclaration::InPartitionKey)); + res_columns[res_index++]->insert(it->second.Get(PresenceType::InPartitionKey)); if (columns_mask[src_index++]) - res_columns[res_index++]->insert(it->second.Get(TypeOfPresenceInTableDeclaration::InSamplingKey)); + res_columns[res_index++]->insert(it->second.Get(PresenceType::InSamplingKey)); } } From 94594d8c06d7b375447a0601c5bf2412bc90b562 Mon Sep 17 00:00:00 2001 From: Sabyanin Maxim Date: Sun, 11 Nov 2018 18:44:21 +0300 Subject: [PATCH 025/145] add order_key section in system.tables --- dbms/src/Storages/IStorage.h | 2 +- dbms/src/Storages/StorageMergeTree.h | 2 +- dbms/src/Storages/System/StorageSystemTables.cpp | 12 ++++++++++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index 8a6945b85fe..938133c71a4 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -353,7 +353,7 @@ public: virtual ASTPtr getPartitionExpression() const { return nullptr; } /// Returns secondary expression for storage or nullptr if there is no. - virtual ASTPtr getSecondaryExpression() const { return nullptr; } + virtual ASTPtr getOrderExpression() const { return nullptr; } using ITableDeclaration::ITableDeclaration; using std::enable_shared_from_this::shared_from_this; diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index d495f318622..550231d3044 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -100,7 +100,7 @@ public: ASTPtr getPartitionExpression() const override { return data.partition_expr_ast; } - ASTPtr getSecondaryExpression() const override { return data.secondary_sort_expr_ast; } + ASTPtr getOrderExpression() const override { return data.secondary_sort_expr_ast; } private: String path; diff --git a/dbms/src/Storages/System/StorageSystemTables.cpp b/dbms/src/Storages/System/StorageSystemTables.cpp index d25950f7e87..a7e5f39d0e8 100644 --- a/dbms/src/Storages/System/StorageSystemTables.cpp +++ b/dbms/src/Storages/System/StorageSystemTables.cpp @@ -41,6 +41,7 @@ StorageSystemTables::StorageSystemTables(const std::string & name_) {"create_table_query", std::make_shared()}, {"engine_full", std::make_shared()}, {"primary_key", std::make_shared()}, + {"order_key", std::make_shared()}, {"partition_key", std::make_shared()}, {"sample_key", std::make_shared()}, })); @@ -156,6 +157,9 @@ protected: if (columns_mask[src_index++]) res_columns[res_index++]->insertDefault(); + + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); } } @@ -261,6 +265,14 @@ protected: res_columns[res_index++]->insertDefault(); } + if (columns_mask[src_index++]) + { + if ((expression_ptr = table_it->getOrderExpression())) + res_columns[res_index++]->insert(queryToString(expression_ptr)); + else + res_columns[res_index++]->insertDefault(); + } + if (columns_mask[src_index++]) { if ((expression_ptr = table_it->getPartitionExpression())) From 03e6a29eb2b74ad6b0a38404f88698d63e652f58 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Tue, 30 Oct 2018 18:04:13 +0300 Subject: [PATCH 026/145] =?UTF-8?q?Implement=20'ALTER=20=E2=80=A6=20FREEZE?= =?UTF-8?q?'=20command.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Need some tests! --- .../Interpreters/InterpreterAlterQuery.cpp | 4 ++ dbms/src/Parsers/ASTAlterQuery.cpp | 4 ++ dbms/src/Parsers/ASTAlterQuery.h | 1 + dbms/src/Parsers/ParserAlterQuery.cpp | 38 +++++++++++-------- dbms/src/Parsers/ParserAlterQuery.h | 2 +- dbms/src/Storages/IStorage.h | 7 ++++ dbms/src/Storages/MergeTree/MergeTreeData.cpp | 34 +++++++++++++++++ dbms/src/Storages/MergeTree/MergeTreeData.h | 6 +++ dbms/src/Storages/PartitionCommands.cpp | 7 ++++ dbms/src/Storages/PartitionCommands.h | 1 + dbms/src/Storages/StorageMaterializedView.cpp | 6 +++ dbms/src/Storages/StorageMaterializedView.h | 1 + dbms/src/Storages/StorageMergeTree.cpp | 5 +++ dbms/src/Storages/StorageMergeTree.h | 1 + .../Storages/StorageReplicatedMergeTree.cpp | 6 +++ .../src/Storages/StorageReplicatedMergeTree.h | 1 + .../queries/0_stateless/00428_partition.sh | 4 ++ 17 files changed, 112 insertions(+), 16 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterAlterQuery.cpp b/dbms/src/Interpreters/InterpreterAlterQuery.cpp index 31eedff6d11..551a6b9f8c8 100644 --- a/dbms/src/Interpreters/InterpreterAlterQuery.cpp +++ b/dbms/src/Interpreters/InterpreterAlterQuery.cpp @@ -85,6 +85,10 @@ BlockIO InterpreterAlterQuery::execute() table->freezePartition(command.partition, command.with_name, context); break; + case PartitionCommand::FREEZE_ALL: + table->freezeAll(command.with_name, context); + break; + case PartitionCommand::CLEAR_COLUMN: table->clearColumnInPartition(command.partition, command.column_name, context); break; diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp index 8869e873c71..5e8fdf88276 100644 --- a/dbms/src/Parsers/ASTAlterQuery.cpp +++ b/dbms/src/Parsers/ASTAlterQuery.cpp @@ -128,6 +128,10 @@ void ASTAlterCommand::formatImpl( << " " << std::quoted(with_name, '\''); } } + else if (type == ASTAlterCommand::FREEZE_ALL) + { + // TODO: implement this. + } else if (type == ASTAlterCommand::DELETE) { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "DELETE WHERE " << (settings.hilite ? hilite_none : ""); diff --git a/dbms/src/Parsers/ASTAlterQuery.h b/dbms/src/Parsers/ASTAlterQuery.h index 9bfcdae2427..2b0ccded24b 100644 --- a/dbms/src/Parsers/ASTAlterQuery.h +++ b/dbms/src/Parsers/ASTAlterQuery.h @@ -31,6 +31,7 @@ public: REPLACE_PARTITION, FETCH_PARTITION, FREEZE_PARTITION, + FREEZE_ALL, DELETE, UPDATE, diff --git a/dbms/src/Parsers/ParserAlterQuery.cpp b/dbms/src/Parsers/ParserAlterQuery.cpp index 56eaddb38ee..60fa9a9beae 100644 --- a/dbms/src/Parsers/ParserAlterQuery.cpp +++ b/dbms/src/Parsers/ParserAlterQuery.cpp @@ -30,7 +30,8 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserKeyword s_drop_partition("DROP PARTITION"); ParserKeyword s_attach_part("ATTACH PART"); ParserKeyword s_fetch_partition("FETCH PARTITION"); - ParserKeyword s_freeze_partition("FREEZE PARTITION"); + ParserKeyword s_replace_partition("REPLACE PARTITION"); + ParserKeyword s_freeze("FREEZE"); ParserKeyword s_partition("PARTITION"); ParserKeyword s_after("AFTER"); @@ -121,7 +122,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->type = ASTAlterCommand::ATTACH_PARTITION; } } - else if (ParserKeyword{"REPLACE PARTITION"}.ignore(pos, expected)) + else if (s_replace_partition.ignore(pos, expected)) { if (!parser_partition.parse(pos, command->partition, expected)) return false; @@ -158,25 +159,32 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->from = typeid_cast(*ast_from).value.get(); command->type = ASTAlterCommand::FETCH_PARTITION; } - else if (s_freeze_partition.ignore(pos, expected)) + else if (s_freeze.ignore(pos, expected)) { - if (!parser_partition.parse(pos, command->partition, expected)) - return false; - - /// WITH NAME 'name' - place local backup to directory with specified name - if (s_with.ignore(pos, expected)) + if (s_partition.ignore(pos, expected)) { - if (!s_name.ignore(pos, expected)) + if (!parser_partition.parse(pos, command->partition, expected)) return false; - ASTPtr ast_with_name; - if (!parser_string_literal.parse(pos, ast_with_name, expected)) - return false; + /// WITH NAME 'name' - place local backup to directory with specified name + if (s_with.ignore(pos, expected)) + { + if (!s_name.ignore(pos, expected)) + return false; - command->with_name = typeid_cast(*ast_with_name).value.get(); + ASTPtr ast_with_name; + if (!parser_string_literal.parse(pos, ast_with_name, expected)) + return false; + + command->with_name = typeid_cast(*ast_with_name).value.get(); + } + + command->type = ASTAlterCommand::FREEZE_PARTITION; + } + else + { + command->type = ASTAlterCommand::FREEZE_ALL; } - - command->type = ASTAlterCommand::FREEZE_PARTITION; } else if (s_modify_column.ignore(pos, expected)) { diff --git a/dbms/src/Parsers/ParserAlterQuery.h b/dbms/src/Parsers/ParserAlterQuery.h index 46908ae135d..0e05294112f 100644 --- a/dbms/src/Parsers/ParserAlterQuery.h +++ b/dbms/src/Parsers/ParserAlterQuery.h @@ -15,7 +15,7 @@ namespace DB * [MODIFY PRIMARY KEY (a, b, c...)] * [DROP|DETACH|ATTACH PARTITION|PART partition, ...] * [FETCH PARTITION partition FROM ...] - * [FREEZE PARTITION] + * [FREEZE [PARTITION]] * [DELETE WHERE ...] * [UPDATE col_name = expr, ... WHERE ...] */ diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index a0a61035580..e2d4970f8b8 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -278,6 +278,13 @@ public: throw Exception("Method freezePartition is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } + /** Run the FREEZE request. That is, create a local backup (snapshot) of data using the `localBackup` function (see localBackup.h) + */ + virtual void freezeAll(const String & /*with_name*/, const Context & /*context*/) + { + throw Exception("Method freezeAll is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + } + /** Perform any background work. For example, combining parts in a MergeTree type table. * Returns whether any work has been done. */ diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 524b8bfe8bf..310cee9059c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -2104,6 +2104,40 @@ void MergeTreeData::freezePartition(const ASTPtr & partition_ast, const String & LOG_DEBUG(log, "Freezed " << parts_processed << " parts"); } +void MergeTreeData::freezeAll(const String & with_name, const Context & context) +{ + String clickhouse_path = Poco::Path(context.getPath()).makeAbsolute().toString(); + String shadow_path = clickhouse_path + "shadow/"; + Poco::File(shadow_path).createDirectories(); + String backup_path = shadow_path + + (!with_name.empty() + ? escapeForFileName(with_name) + : toString(Increment(shadow_path + "increment.txt").get(true))) + + "/"; + + LOG_DEBUG(log, "Snapshot will be placed at " + backup_path); + + /// Acquire a snapshot of active data parts to prevent removing while doing backup. + const auto data_parts = getDataParts(); + + size_t parts_processed = 0; + for (const auto & part : data_parts) + { + LOG_DEBUG(log, "Freezing part " << part->name); + + String part_absolute_path = Poco::Path(part->getFullPath()).absolute().toString(); + if (!startsWith(part_absolute_path, clickhouse_path)) + throw Exception("Part path " + part_absolute_path + " is not inside " + clickhouse_path, ErrorCodes::LOGICAL_ERROR); + + String backup_part_absolute_path = part_absolute_path; + backup_part_absolute_path.replace(0, clickhouse_path.size(), backup_path); + localBackup(part_absolute_path, backup_part_absolute_path); + ++parts_processed; + } + + LOG_DEBUG(log, "Freezed " << parts_processed << " parts"); +} + size_t MergeTreeData::getPartitionSize(const std::string & partition_id) const { size_t size = 0; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 5ad413f21f8..c420e997677 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -501,6 +501,12 @@ public: */ void freezePartition(const ASTPtr & partition, const String & with_name, const Context & context); + /** Create local backup (snapshot) for all parts. + * Backup is created in directory clickhouse_dir/shadow/i/, where i - incremental number, + * or if 'with_name' is specified - backup is created in directory with specified name. + */ + void freezeAll(const String & with_name, const Context & context); + /// Returns the size of partition in bytes. size_t getPartitionSize(const std::string & partition_id) const; diff --git a/dbms/src/Storages/PartitionCommands.cpp b/dbms/src/Storages/PartitionCommands.cpp index e7daabb246c..00938a30996 100644 --- a/dbms/src/Storages/PartitionCommands.cpp +++ b/dbms/src/Storages/PartitionCommands.cpp @@ -58,6 +58,13 @@ std::optional PartitionCommand::parse(const ASTAlterCommand * res.with_name = command_ast->with_name; return res; } + else if (command_ast->type == ASTAlterCommand::FREEZE_ALL) + { + PartitionCommand res; + res.type = FREEZE_ALL; + res.with_name = command_ast->with_name; + return res; + } else if (command_ast->type == ASTAlterCommand::DROP_COLUMN && command_ast->partition) { if (!command_ast->clear_column) diff --git a/dbms/src/Storages/PartitionCommands.h b/dbms/src/Storages/PartitionCommands.h index 2f2479eccc9..e65ddf71d81 100644 --- a/dbms/src/Storages/PartitionCommands.h +++ b/dbms/src/Storages/PartitionCommands.h @@ -22,6 +22,7 @@ struct PartitionCommand REPLACE_PARTITION, FETCH_PARTITION, FREEZE_PARTITION, + FREEZE_ALL, CLEAR_COLUMN, }; diff --git a/dbms/src/Storages/StorageMaterializedView.cpp b/dbms/src/Storages/StorageMaterializedView.cpp index b83c49c3a25..4c5f612a702 100644 --- a/dbms/src/Storages/StorageMaterializedView.cpp +++ b/dbms/src/Storages/StorageMaterializedView.cpp @@ -263,6 +263,12 @@ void StorageMaterializedView::freezePartition(const ASTPtr & partition, const St getTargetTable()->freezePartition(partition, with_name, context); } +void StorageMaterializedView::freezeAll(const String & with_name, const Context & context) +{ + checkStatementCanBeForwarded(); + getTargetTable()->freezeAll(with_name, context); +} + void StorageMaterializedView::mutate(const MutationCommands & commands, const Context & context) { checkStatementCanBeForwarded(); diff --git a/dbms/src/Storages/StorageMaterializedView.h b/dbms/src/Storages/StorageMaterializedView.h index c20757b802b..5aeb998d648 100644 --- a/dbms/src/Storages/StorageMaterializedView.h +++ b/dbms/src/Storages/StorageMaterializedView.h @@ -39,6 +39,7 @@ public: void clearColumnInPartition(const ASTPtr & partition, const Field & column_name, const Context & context) override; void attachPartition(const ASTPtr & partition, bool part, const Context & context) override; void freezePartition(const ASTPtr & partition, const String & with_name, const Context & context) override; + void freezeAll(const String & with_name, const Context & context) override; void mutate(const MutationCommands & commands, const Context & context) override; void shutdown() override; diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 0e926218a05..6d432f388c1 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -863,6 +863,11 @@ void StorageMergeTree::freezePartition(const ASTPtr & partition, const String & data.freezePartition(partition, with_name, context); } +void StorageMergeTree::freezeAll(const String & with_name, const Context & context) +{ + data.freezeAll(with_name, context); +} + void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & context) { auto lock1 = lockStructure(false, __PRETTY_FUNCTION__); diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index 0182f31dc8e..f3ffac89bb3 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -71,6 +71,7 @@ public: void attachPartition(const ASTPtr & partition, bool part, const Context & context) override; void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & context) override; void freezePartition(const ASTPtr & partition, const String & with_name, const Context & context) override; + void freezeAll(const String & with_name, const Context & context) override; void mutate(const MutationCommands & commands, const Context & context) override; diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 1d29a6dba8b..7c1576f922d 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -4160,6 +4160,12 @@ void StorageReplicatedMergeTree::freezePartition(const ASTPtr & partition, const } +void StorageReplicatedMergeTree::freezeAll(const String & with_name, const Context & context) +{ + data.freezeAll(with_name, context); +} + + void StorageReplicatedMergeTree::mutate(const MutationCommands & commands, const Context &) { /// Overview of the mutation algorithm. diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index 491f30d93e5..37a25bb02b4 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -122,6 +122,7 @@ public: void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & context) override; void fetchPartition(const ASTPtr & partition, const String & from, const Context & context) override; void freezePartition(const ASTPtr & partition, const String & with_name, const Context & context) override; + void freezeAll(const String & with_name, const Context & context) override; void mutate(const MutationCommands & commands, const Context & context) override; diff --git a/dbms/tests/queries/0_stateless/00428_partition.sh b/dbms/tests/queries/0_stateless/00428_partition.sh index ce6ad9e1cd8..b23c4bb5105 100755 --- a/dbms/tests/queries/0_stateless/00428_partition.sh +++ b/dbms/tests/queries/0_stateless/00428_partition.sh @@ -23,6 +23,10 @@ for part in `$chl "SELECT name FROM system.parts WHERE database='test' AND table cat $ch_dir/data/test/partition_428/$part/columns.txt) | wc -l done +$chl "ALTER TABLE test.partition_428 FREEZE" + +find $ch_dir/shadow + $chl "ALTER TABLE test.partition_428 DETACH PARTITION 197001" $chl "ALTER TABLE test.partition_428 ATTACH PARTITION 197001" From a8370604548a440222cfa8b0b041e17a63bc9327 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Tue, 30 Oct 2018 21:57:20 +0300 Subject: [PATCH 027/145] Lock table structure before freezing. --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 2 ++ dbms/tests/queries/0_stateless/00428_partition.sh | 10 +++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 310cee9059c..55c44e05587 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -2117,6 +2117,8 @@ void MergeTreeData::freezeAll(const String & with_name, const Context & context) LOG_DEBUG(log, "Snapshot will be placed at " + backup_path); + auto lock = lockStructureForAlter(__PRETTY_FUNCTION__); + /// Acquire a snapshot of active data parts to prevent removing while doing backup. const auto data_parts = getDataParts(); diff --git a/dbms/tests/queries/0_stateless/00428_partition.sh b/dbms/tests/queries/0_stateless/00428_partition.sh index b23c4bb5105..1deed4f853e 100755 --- a/dbms/tests/queries/0_stateless/00428_partition.sh +++ b/dbms/tests/queries/0_stateless/00428_partition.sh @@ -25,7 +25,9 @@ done $chl "ALTER TABLE test.partition_428 FREEZE" -find $ch_dir/shadow +pushd +cd $chdir && find shadow -type f -exec md5sum {} \; +popd $chl "ALTER TABLE test.partition_428 DETACH PARTITION 197001" $chl "ALTER TABLE test.partition_428 ATTACH PARTITION 197001" @@ -37,6 +39,12 @@ for part in `$chl "SELECT name FROM system.parts WHERE database='test' AND table done $chl "ALTER TABLE test.partition_428 MODIFY COLUMN v1 Int8" + +# Check that backup hasn't changed +pushd +cd $chdir && find shadow -type f -exec md5sum {} \; +popd + $chl "OPTIMIZE TABLE test.partition_428" $chl "SELECT toUInt16(p), k, v1 FROM test.partition_428 ORDER BY k FORMAT CSV" From 935615a64799ff713e7f91904fe18d605290ed0e Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Thu, 1 Nov 2018 13:35:50 +0300 Subject: [PATCH 028/145] Reimplement FREEZE command. --- .gitignore | 1 + .../Interpreters/InterpreterAlterQuery.cpp | 4 -- dbms/src/Storages/AlterCommands.cpp | 11 +++ dbms/src/Storages/AlterCommands.h | 11 ++- dbms/src/Storages/IStorage.h | 7 -- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 70 +++++++++---------- dbms/src/Storages/MergeTree/MergeTreeData.h | 9 +-- dbms/src/Storages/PartitionCommands.cpp | 7 -- dbms/src/Storages/PartitionCommands.h | 7 +- dbms/src/Storages/StorageMaterializedView.cpp | 6 -- dbms/src/Storages/StorageMaterializedView.h | 1 - dbms/src/Storages/StorageMergeTree.cpp | 14 ++-- dbms/src/Storages/StorageMergeTree.h | 1 - .../Storages/StorageReplicatedMergeTree.cpp | 6 -- .../src/Storages/StorageReplicatedMergeTree.h | 1 - dbms/tests/clickhouse-test | 2 +- .../0_stateless/00428_partition.reference | 38 ++++++++++ .../queries/0_stateless/00428_partition.sh | 11 ++- 18 files changed, 114 insertions(+), 93 deletions(-) diff --git a/.gitignore b/.gitignore index 8359edbabde..585a4074767 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ /build /build_* +/build-* /docs/build /docs/edit /docs/tools/venv/ diff --git a/dbms/src/Interpreters/InterpreterAlterQuery.cpp b/dbms/src/Interpreters/InterpreterAlterQuery.cpp index 551a6b9f8c8..31eedff6d11 100644 --- a/dbms/src/Interpreters/InterpreterAlterQuery.cpp +++ b/dbms/src/Interpreters/InterpreterAlterQuery.cpp @@ -85,10 +85,6 @@ BlockIO InterpreterAlterQuery::execute() table->freezePartition(command.partition, command.with_name, context); break; - case PartitionCommand::FREEZE_ALL: - table->freezeAll(command.with_name, context); - break; - case PartitionCommand::CLEAR_COLUMN: table->clearColumnInPartition(command.partition, command.column_name, context); break; diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index fefd96262c2..dd5604a8f82 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -91,6 +91,13 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.primary_key = command_ast->primary_key; return command; } + else if (command_ast->type == ASTAlterCommand::FREEZE_ALL) + { + AlterCommand command; + command.type = AlterCommand::FREEZE_ALL; + command.with_name = command_ast->with_name; + return command; + } else return {}; } @@ -238,6 +245,10 @@ void AlterCommand::apply(ColumnsDescription & columns_description) const /// This have no relation to changing the list of columns. /// TODO Check that all columns exist, that only columns with constant defaults are added. } + else if (type == FREEZE_ALL) + { + /// Do nothing with columns. + } else throw Exception("Wrong parameter type in ALTER query", ErrorCodes::LOGICAL_ERROR); } diff --git a/dbms/src/Storages/AlterCommands.h b/dbms/src/Storages/AlterCommands.h index 888bd64f03e..8a07adc7205 100644 --- a/dbms/src/Storages/AlterCommands.h +++ b/dbms/src/Storages/AlterCommands.h @@ -12,13 +12,19 @@ namespace DB class ASTAlterCommand; -/// Operation from the ALTER query (except for manipulation with PART/PARTITION). Adding Nested columns is not expanded to add individual columns. +/// Operation from the ALTER query (except for manipulation with PART/PARTITION). +/// Adding Nested columns is not expanded to add individual columns. struct AlterCommand { enum Type { ADD_COLUMN, DROP_COLUMN, + + FREEZE_ALL, + // Even though this command operates on partitions, it needs global locks to prevent table alteration. + // It's vulnerable to the column modification commands. + MODIFY_COLUMN, MODIFY_PRIMARY_KEY, }; @@ -30,6 +36,9 @@ struct AlterCommand /// For DROP COLUMN ... FROM PARTITION String partition_name; + /// For FREEZE of all partitions + String with_name; + /// For ADD and MODIFY, a new column type. DataTypePtr data_type; diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index e2d4970f8b8..a0a61035580 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -278,13 +278,6 @@ public: throw Exception("Method freezePartition is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } - /** Run the FREEZE request. That is, create a local backup (snapshot) of data using the `localBackup` function (see localBackup.h) - */ - virtual void freezeAll(const String & /*with_name*/, const Context & /*context*/) - { - throw Exception("Method freezeAll is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); - } - /** Perform any background work. For example, combining parts in a MergeTree type table. * Returns whether any work has been done. */ diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 55c44e05587..fb625b373d0 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1279,6 +1279,40 @@ MergeTreeData::AlterDataPartTransactionPtr MergeTreeData::alterDataPart( return transaction; } +void MergeTreeData::freezeAll(const String & with_name, const Context & context) +{ + String clickhouse_path = Poco::Path(context.getPath()).makeAbsolute().toString(); + String shadow_path = clickhouse_path + "shadow/"; + Poco::File(shadow_path).createDirectories(); + String backup_path = shadow_path + + (!with_name.empty() + ? escapeForFileName(with_name) + : toString(Increment(shadow_path + "increment.txt").get(true))) + + "/"; + + LOG_DEBUG(log, "Snapshot will be placed at " + backup_path); + + /// Acquire a snapshot of active data parts to prevent removing while doing backup. + const auto data_parts = getDataParts(); + + size_t parts_processed = 0; + for (const auto & part : data_parts) + { + LOG_DEBUG(log, "Freezing part " << part->name); + + String part_absolute_path = Poco::Path(part->getFullPath()).absolute().toString(); + if (!startsWith(part_absolute_path, clickhouse_path)) + throw Exception("Part path " + part_absolute_path + " is not inside " + clickhouse_path, ErrorCodes::LOGICAL_ERROR); + + String backup_part_absolute_path = part_absolute_path; + backup_part_absolute_path.replace(0, clickhouse_path.size(), backup_path); + localBackup(part_absolute_path, backup_part_absolute_path); + ++parts_processed; + } + + LOG_DEBUG(log, "Freezed " << parts_processed << " parts"); +} + void MergeTreeData::AlterDataPartTransaction::commit() { if (!data_part) @@ -2104,42 +2138,6 @@ void MergeTreeData::freezePartition(const ASTPtr & partition_ast, const String & LOG_DEBUG(log, "Freezed " << parts_processed << " parts"); } -void MergeTreeData::freezeAll(const String & with_name, const Context & context) -{ - String clickhouse_path = Poco::Path(context.getPath()).makeAbsolute().toString(); - String shadow_path = clickhouse_path + "shadow/"; - Poco::File(shadow_path).createDirectories(); - String backup_path = shadow_path - + (!with_name.empty() - ? escapeForFileName(with_name) - : toString(Increment(shadow_path + "increment.txt").get(true))) - + "/"; - - LOG_DEBUG(log, "Snapshot will be placed at " + backup_path); - - auto lock = lockStructureForAlter(__PRETTY_FUNCTION__); - - /// Acquire a snapshot of active data parts to prevent removing while doing backup. - const auto data_parts = getDataParts(); - - size_t parts_processed = 0; - for (const auto & part : data_parts) - { - LOG_DEBUG(log, "Freezing part " << part->name); - - String part_absolute_path = Poco::Path(part->getFullPath()).absolute().toString(); - if (!startsWith(part_absolute_path, clickhouse_path)) - throw Exception("Part path " + part_absolute_path + " is not inside " + clickhouse_path, ErrorCodes::LOGICAL_ERROR); - - String backup_part_absolute_path = part_absolute_path; - backup_part_absolute_path.replace(0, clickhouse_path.size(), backup_path); - localBackup(part_absolute_path, backup_part_absolute_path); - ++parts_processed; - } - - LOG_DEBUG(log, "Freezed " << parts_processed << " parts"); -} - size_t MergeTreeData::getPartitionSize(const std::string & partition_id) const { size_t size = 0; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index c420e997677..cb78ba78552 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -480,6 +480,9 @@ public: const ASTPtr & new_primary_key, bool skip_sanity_checks); + /// Freezes all parts. + void freezeAll(const String & with_name, const Context & context); + /// Should be called if part data is suspected to be corrupted. void reportBrokenPart(const String & name) const { @@ -501,12 +504,6 @@ public: */ void freezePartition(const ASTPtr & partition, const String & with_name, const Context & context); - /** Create local backup (snapshot) for all parts. - * Backup is created in directory clickhouse_dir/shadow/i/, where i - incremental number, - * or if 'with_name' is specified - backup is created in directory with specified name. - */ - void freezeAll(const String & with_name, const Context & context); - /// Returns the size of partition in bytes. size_t getPartitionSize(const std::string & partition_id) const; diff --git a/dbms/src/Storages/PartitionCommands.cpp b/dbms/src/Storages/PartitionCommands.cpp index 00938a30996..e7daabb246c 100644 --- a/dbms/src/Storages/PartitionCommands.cpp +++ b/dbms/src/Storages/PartitionCommands.cpp @@ -58,13 +58,6 @@ std::optional PartitionCommand::parse(const ASTAlterCommand * res.with_name = command_ast->with_name; return res; } - else if (command_ast->type == ASTAlterCommand::FREEZE_ALL) - { - PartitionCommand res; - res.type = FREEZE_ALL; - res.with_name = command_ast->with_name; - return res; - } else if (command_ast->type == ASTAlterCommand::DROP_COLUMN && command_ast->partition) { if (!command_ast->clear_column) diff --git a/dbms/src/Storages/PartitionCommands.h b/dbms/src/Storages/PartitionCommands.h index e65ddf71d81..e1e46673f1f 100644 --- a/dbms/src/Storages/PartitionCommands.h +++ b/dbms/src/Storages/PartitionCommands.h @@ -17,13 +17,12 @@ struct PartitionCommand { enum Type { - DROP_PARTITION, ATTACH_PARTITION, - REPLACE_PARTITION, + CLEAR_COLUMN, + DROP_PARTITION, FETCH_PARTITION, FREEZE_PARTITION, - FREEZE_ALL, - CLEAR_COLUMN, + REPLACE_PARTITION, }; Type type; diff --git a/dbms/src/Storages/StorageMaterializedView.cpp b/dbms/src/Storages/StorageMaterializedView.cpp index 4c5f612a702..b83c49c3a25 100644 --- a/dbms/src/Storages/StorageMaterializedView.cpp +++ b/dbms/src/Storages/StorageMaterializedView.cpp @@ -263,12 +263,6 @@ void StorageMaterializedView::freezePartition(const ASTPtr & partition, const St getTargetTable()->freezePartition(partition, with_name, context); } -void StorageMaterializedView::freezeAll(const String & with_name, const Context & context) -{ - checkStatementCanBeForwarded(); - getTargetTable()->freezeAll(with_name, context); -} - void StorageMaterializedView::mutate(const MutationCommands & commands, const Context & context) { checkStatementCanBeForwarded(); diff --git a/dbms/src/Storages/StorageMaterializedView.h b/dbms/src/Storages/StorageMaterializedView.h index 5aeb998d648..c20757b802b 100644 --- a/dbms/src/Storages/StorageMaterializedView.h +++ b/dbms/src/Storages/StorageMaterializedView.h @@ -39,7 +39,6 @@ public: void clearColumnInPartition(const ASTPtr & partition, const Field & column_name, const Context & context) override; void attachPartition(const ASTPtr & partition, bool part, const Context & context) override; void freezePartition(const ASTPtr & partition, const String & with_name, const Context & context) override; - void freezeAll(const String & with_name, const Context & context) override; void mutate(const MutationCommands & commands, const Context & context) override; void shutdown() override; diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 6d432f388c1..14451a46384 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -261,6 +261,15 @@ void StorageMergeTree::alter( if (primary_key_is_modified) data.loadDataParts(false); + + /// Do freeze of all parts after all other operations. + for (const AlterCommand & param : params) + { + if (param.type == AlterCommand::FREEZE_ALL) + { + data.freezeAll(param.with_name, context); + } + } } @@ -863,11 +872,6 @@ void StorageMergeTree::freezePartition(const ASTPtr & partition, const String & data.freezePartition(partition, with_name, context); } -void StorageMergeTree::freezeAll(const String & with_name, const Context & context) -{ - data.freezeAll(with_name, context); -} - void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & context) { auto lock1 = lockStructure(false, __PRETTY_FUNCTION__); diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index f3ffac89bb3..0182f31dc8e 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -71,7 +71,6 @@ public: void attachPartition(const ASTPtr & partition, bool part, const Context & context) override; void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & context) override; void freezePartition(const ASTPtr & partition, const String & with_name, const Context & context) override; - void freezeAll(const String & with_name, const Context & context) override; void mutate(const MutationCommands & commands, const Context & context) override; diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 7c1576f922d..1d29a6dba8b 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -4160,12 +4160,6 @@ void StorageReplicatedMergeTree::freezePartition(const ASTPtr & partition, const } -void StorageReplicatedMergeTree::freezeAll(const String & with_name, const Context & context) -{ - data.freezeAll(with_name, context); -} - - void StorageReplicatedMergeTree::mutate(const MutationCommands & commands, const Context &) { /// Overview of the mutation algorithm. diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index 37a25bb02b4..491f30d93e5 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -122,7 +122,6 @@ public: void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & context) override; void fetchPartition(const ASTPtr & partition, const String & from, const Context & context) override; void freezePartition(const ASTPtr & partition, const String & with_name, const Context & context) override; - void freezeAll(const String & with_name, const Context & context) override; void mutate(const MutationCommands & commands, const Context & context) override; diff --git a/dbms/tests/clickhouse-test b/dbms/tests/clickhouse-test index fc70d3f6499..0abf85f9d79 100755 --- a/dbms/tests/clickhouse-test +++ b/dbms/tests/clickhouse-test @@ -277,7 +277,7 @@ def main(args): result_is_different = subprocess.call(['cmp', '-s', reference_file, stdout_file], stdout = PIPE) if result_is_different: - (diff, _) = Popen(['diff', '--side-by-side', reference_file, stdout_file], stdout = PIPE).communicate() + (diff, _) = Popen(['diff', '-u', reference_file, stdout_file], stdout = PIPE).communicate() diff = unicode(diff, errors='replace', encoding='utf-8') failure = et.Element("failure", attrib = {"message": "result differs with reference"}) diff --git a/dbms/tests/queries/0_stateless/00428_partition.reference b/dbms/tests/queries/0_stateless/00428_partition.reference index 788600df41e..0bcf3393620 100644 --- a/dbms/tests/queries/0_stateless/00428_partition.reference +++ b/dbms/tests/queries/0_stateless/00428_partition.reference @@ -1,6 +1,44 @@ 5 5 +55a54008ad1ba589aa210d2629c1df41 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/primary.idx +88cdc31ded355e7572d68d8cde525d3a shadow/1/data/test/partition_428/19700201_19700201_1_1_0/p.bin +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/p.mrk +e2af3bef1fd129aea73a890ede1e7a30 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/k.bin +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/k.mrk +082814b5aa5109160d5c0c5aff10d4df shadow/1/data/test/partition_428/19700201_19700201_1_1_0/v1.bin +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/v1.mrk +77d5af402ada101574f4da114f242e02 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/columns.txt +b01e3d4df40467db3f1c2d029f59f6a2 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/checksums.txt +9e688c58a5487b8eaf69c9e1005ad0bf shadow/1/data/test/partition_428/19700102_19700102_2_2_0/primary.idx +cfcb770c3ecd0990dcceb1bde129e6c6 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/p.bin +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/p.mrk +082814b5aa5109160d5c0c5aff10d4df shadow/1/data/test/partition_428/19700102_19700102_2_2_0/k.bin +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/k.mrk +38e62ff37e1e5064e9a3f605dfe09d13 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/v1.bin +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/v1.mrk +77d5af402ada101574f4da114f242e02 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/columns.txt +e6654eba9e88b001280d3bdd21ccc417 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/checksums.txt +b026324c6904b2a9cb4b88d6d61c81d1 shadow/increment.txt 5 5 +55a54008ad1ba589aa210d2629c1df41 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/primary.idx +88cdc31ded355e7572d68d8cde525d3a shadow/1/data/test/partition_428/19700201_19700201_1_1_0/p.bin +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/p.mrk +e2af3bef1fd129aea73a890ede1e7a30 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/k.bin +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/k.mrk +082814b5aa5109160d5c0c5aff10d4df shadow/1/data/test/partition_428/19700201_19700201_1_1_0/v1.bin +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/v1.mrk +77d5af402ada101574f4da114f242e02 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/columns.txt +b01e3d4df40467db3f1c2d029f59f6a2 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/checksums.txt +9e688c58a5487b8eaf69c9e1005ad0bf shadow/1/data/test/partition_428/19700102_19700102_2_2_0/primary.idx +cfcb770c3ecd0990dcceb1bde129e6c6 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/p.bin +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/p.mrk +082814b5aa5109160d5c0c5aff10d4df shadow/1/data/test/partition_428/19700102_19700102_2_2_0/k.bin +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/k.mrk +38e62ff37e1e5064e9a3f605dfe09d13 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/v1.bin +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/v1.mrk +77d5af402ada101574f4da114f242e02 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/columns.txt +e6654eba9e88b001280d3bdd21ccc417 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/checksums.txt +b026324c6904b2a9cb4b88d6d61c81d1 shadow/increment.txt 31,1,2 1,2,3 diff --git a/dbms/tests/queries/0_stateless/00428_partition.sh b/dbms/tests/queries/0_stateless/00428_partition.sh index 1deed4f853e..2bb56e5d569 100755 --- a/dbms/tests/queries/0_stateless/00428_partition.sh +++ b/dbms/tests/queries/0_stateless/00428_partition.sh @@ -25,9 +25,8 @@ done $chl "ALTER TABLE test.partition_428 FREEZE" -pushd -cd $chdir && find shadow -type f -exec md5sum {} \; -popd +# Do `cd` for consistent output for reference +cd $ch_dir && find shadow -type f -exec md5sum {} \; $chl "ALTER TABLE test.partition_428 DETACH PARTITION 197001" $chl "ALTER TABLE test.partition_428 ATTACH PARTITION 197001" @@ -40,10 +39,8 @@ done $chl "ALTER TABLE test.partition_428 MODIFY COLUMN v1 Int8" -# Check that backup hasn't changed -pushd -cd $chdir && find shadow -type f -exec md5sum {} \; -popd +# Check the backup hasn't changed +cd $ch_dir && find shadow -type f -exec md5sum {} \; $chl "OPTIMIZE TABLE test.partition_428" From 126ff55fa7524b016ed1593b4208cc0306e0e235 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Thu, 1 Nov 2018 14:16:04 +0300 Subject: [PATCH 029/145] =?UTF-8?q?Forgot=20about=20'WITH=20NAME=20?= =?UTF-8?q?=E2=80=A6'=20statement=20part.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dbms/src/Parsers/ASTAlterQuery.cpp | 8 +++++++- dbms/src/Parsers/ParserAlterQuery.cpp | 13 +++++++++++++ dbms/src/Parsers/ParserAlterQuery.h | 2 +- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp index 5e8fdf88276..1748edbdcd1 100644 --- a/dbms/src/Parsers/ASTAlterQuery.cpp +++ b/dbms/src/Parsers/ASTAlterQuery.cpp @@ -130,7 +130,13 @@ void ASTAlterCommand::formatImpl( } else if (type == ASTAlterCommand::FREEZE_ALL) { - // TODO: implement this. + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "FREEZE"; + + if (!with_name.empty()) + { + settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << "WITH NAME" << (settings.hilite ? hilite_none : "") + << " " << std::quoted(with_name, '\''); + } } else if (type == ASTAlterCommand::DELETE) { diff --git a/dbms/src/Parsers/ParserAlterQuery.cpp b/dbms/src/Parsers/ParserAlterQuery.cpp index 60fa9a9beae..023c2daf6aa 100644 --- a/dbms/src/Parsers/ParserAlterQuery.cpp +++ b/dbms/src/Parsers/ParserAlterQuery.cpp @@ -183,6 +183,19 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected } else { + /// WITH NAME 'name' - place local backup to directory with specified name + if (s_with.ignore(pos, expected)) + { + if (!s_name.ignore(pos, expected)) + return false; + + ASTPtr ast_with_name; + if (!parser_string_literal.parse(pos, ast_with_name, expected)) + return false; + + command->with_name = typeid_cast(*ast_with_name).value.get(); + } + command->type = ASTAlterCommand::FREEZE_ALL; } } diff --git a/dbms/src/Parsers/ParserAlterQuery.h b/dbms/src/Parsers/ParserAlterQuery.h index 0e05294112f..3c0f8708bad 100644 --- a/dbms/src/Parsers/ParserAlterQuery.h +++ b/dbms/src/Parsers/ParserAlterQuery.h @@ -15,7 +15,7 @@ namespace DB * [MODIFY PRIMARY KEY (a, b, c...)] * [DROP|DETACH|ATTACH PARTITION|PART partition, ...] * [FETCH PARTITION partition FROM ...] - * [FREEZE [PARTITION]] + * [FREEZE [PARTITION] [WITH NAME name]] * [DELETE WHERE ...] * [UPDATE col_name = expr, ... WHERE ...] */ From 56f0f6194fd312791b6ad87b5f4734f7e406a355 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Thu, 1 Nov 2018 14:51:17 +0300 Subject: [PATCH 030/145] Refactor parsing of 'WITH NAME' --- dbms/src/Parsers/ParserAlterQuery.cpp | 43 +++++++++------------------ 1 file changed, 14 insertions(+), 29 deletions(-) diff --git a/dbms/src/Parsers/ParserAlterQuery.cpp b/dbms/src/Parsers/ParserAlterQuery.cpp index 023c2daf6aa..2aee68211da 100644 --- a/dbms/src/Parsers/ParserAlterQuery.cpp +++ b/dbms/src/Parsers/ParserAlterQuery.cpp @@ -166,38 +166,23 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (!parser_partition.parse(pos, command->partition, expected)) return false; - /// WITH NAME 'name' - place local backup to directory with specified name - if (s_with.ignore(pos, expected)) - { - if (!s_name.ignore(pos, expected)) - return false; - - ASTPtr ast_with_name; - if (!parser_string_literal.parse(pos, ast_with_name, expected)) - return false; - - command->with_name = typeid_cast(*ast_with_name).value.get(); - } - command->type = ASTAlterCommand::FREEZE_PARTITION; - } - else - { - /// WITH NAME 'name' - place local backup to directory with specified name - if (s_with.ignore(pos, expected)) - { - if (!s_name.ignore(pos, expected)) - return false; - - ASTPtr ast_with_name; - if (!parser_string_literal.parse(pos, ast_with_name, expected)) - return false; - - command->with_name = typeid_cast(*ast_with_name).value.get(); - } - + } else { command->type = ASTAlterCommand::FREEZE_ALL; } + + /// WITH NAME 'name' - place local backup to directory with specified name + if (s_with.ignore(pos, expected)) + { + if (!s_name.ignore(pos, expected)) + return false; + + ASTPtr ast_with_name; + if (!parser_string_literal.parse(pos, ast_with_name, expected)) + return false; + + command->with_name = typeid_cast(*ast_with_name).value.get(); + } } else if (s_modify_column.ignore(pos, expected)) { From b01acc891746916c6a5dab7b5c90cd67d513c67f Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Thu, 1 Nov 2018 16:55:49 +0300 Subject: [PATCH 031/145] Fix style --- dbms/src/Parsers/ParserAlterQuery.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dbms/src/Parsers/ParserAlterQuery.cpp b/dbms/src/Parsers/ParserAlterQuery.cpp index 2aee68211da..2beed9f185d 100644 --- a/dbms/src/Parsers/ParserAlterQuery.cpp +++ b/dbms/src/Parsers/ParserAlterQuery.cpp @@ -167,7 +167,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected return false; command->type = ASTAlterCommand::FREEZE_PARTITION; - } else { + } + else + { command->type = ASTAlterCommand::FREEZE_ALL; } From 0cdb808ded28701618cf738f5b749262820fd4ba Mon Sep 17 00:00:00 2001 From: abyss7 <5627721+abyss7@users.noreply.github.com> Date: Thu, 1 Nov 2018 16:59:19 +0300 Subject: [PATCH 032/145] Fix comments --- dbms/src/Storages/AlterCommands.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/AlterCommands.h b/dbms/src/Storages/AlterCommands.h index 8a07adc7205..3997c92dd9c 100644 --- a/dbms/src/Storages/AlterCommands.h +++ b/dbms/src/Storages/AlterCommands.h @@ -21,9 +21,9 @@ struct AlterCommand ADD_COLUMN, DROP_COLUMN, + /// Even though this command operates on partitions, it needs global locks to prevent table alteration. + /// It's vulnerable to the column modification commands. FREEZE_ALL, - // Even though this command operates on partitions, it needs global locks to prevent table alteration. - // It's vulnerable to the column modification commands. MODIFY_COLUMN, MODIFY_PRIMARY_KEY, From 5d3da110a87f69d3a30da5701f12ad885ab64602 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Thu, 1 Nov 2018 20:13:01 +0300 Subject: [PATCH 033/145] Refactor |freezePartition()| and |freezeAll()| --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 114 ++++++++---------- dbms/src/Storages/MergeTree/MergeTreeData.h | 4 + 2 files changed, 51 insertions(+), 67 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index fb625b373d0..a18ca332006 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1281,36 +1281,7 @@ MergeTreeData::AlterDataPartTransactionPtr MergeTreeData::alterDataPart( void MergeTreeData::freezeAll(const String & with_name, const Context & context) { - String clickhouse_path = Poco::Path(context.getPath()).makeAbsolute().toString(); - String shadow_path = clickhouse_path + "shadow/"; - Poco::File(shadow_path).createDirectories(); - String backup_path = shadow_path - + (!with_name.empty() - ? escapeForFileName(with_name) - : toString(Increment(shadow_path + "increment.txt").get(true))) - + "/"; - - LOG_DEBUG(log, "Snapshot will be placed at " + backup_path); - - /// Acquire a snapshot of active data parts to prevent removing while doing backup. - const auto data_parts = getDataParts(); - - size_t parts_processed = 0; - for (const auto & part : data_parts) - { - LOG_DEBUG(log, "Freezing part " << part->name); - - String part_absolute_path = Poco::Path(part->getFullPath()).absolute().toString(); - if (!startsWith(part_absolute_path, clickhouse_path)) - throw Exception("Part path " + part_absolute_path + " is not inside " + clickhouse_path, ErrorCodes::LOGICAL_ERROR); - - String backup_part_absolute_path = part_absolute_path; - backup_part_absolute_path.replace(0, clickhouse_path.size(), backup_path); - localBackup(part_absolute_path, backup_part_absolute_path); - ++parts_processed; - } - - LOG_DEBUG(log, "Freezed " << parts_processed << " parts"); + freezePartitionsByMatcher([] (DataPartPtr){ return true; }, with_name, context); } void MergeTreeData::AlterDataPartTransaction::commit() @@ -2098,44 +2069,16 @@ void MergeTreeData::freezePartition(const ASTPtr & partition_ast, const String & else LOG_DEBUG(log, "Freezing parts with partition ID " + partition_id); - String clickhouse_path = Poco::Path(context.getPath()).makeAbsolute().toString(); - String shadow_path = clickhouse_path + "shadow/"; - Poco::File(shadow_path).createDirectories(); - String backup_path = shadow_path - + (!with_name.empty() - ? escapeForFileName(with_name) - : toString(Increment(shadow_path + "increment.txt").get(true))) - + "/"; - LOG_DEBUG(log, "Snapshot will be placed at " + backup_path); - - /// Acquire a snapshot of active data parts to prevent removing while doing backup. - const auto data_parts = getDataParts(); - - size_t parts_processed = 0; - for (const auto & part : data_parts) - { - if (prefix) - { - if (!startsWith(part->info.partition_id, *prefix)) - continue; - } - else if (part->info.partition_id != partition_id) - continue; - - LOG_DEBUG(log, "Freezing part " << part->name); - - String part_absolute_path = Poco::Path(part->getFullPath()).absolute().toString(); - if (!startsWith(part_absolute_path, clickhouse_path)) - throw Exception("Part path " + part_absolute_path + " is not inside " + clickhouse_path, ErrorCodes::LOGICAL_ERROR); - - String backup_part_absolute_path = part_absolute_path; - backup_part_absolute_path.replace(0, clickhouse_path.size(), backup_path); - localBackup(part_absolute_path, backup_part_absolute_path); - ++parts_processed; - } - - LOG_DEBUG(log, "Freezed " << parts_processed << " parts"); + freezePartitionsByMatcher( + [&prefix, &partition_id](DataPartPtr part) { + if (prefix) + return startsWith(part->info.partition_id, *prefix); + else + return part->info.partition_id == partition_id; + }, + with_name, + context); } size_t MergeTreeData::getPartitionSize(const std::string & partition_id) const @@ -2476,4 +2419,41 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::cloneAndLoadDataPart(const Merg return dst_data_part; } +void MergeTreeData::freezePartitionsByMatcher(MatcherFn matcher, const String & with_name, const Context & context) +{ + String clickhouse_path = Poco::Path(context.getPath()).makeAbsolute().toString(); + String shadow_path = clickhouse_path + "shadow/"; + Poco::File(shadow_path).createDirectories(); + String backup_path = shadow_path + + (!with_name.empty() + ? escapeForFileName(with_name) + : toString(Increment(shadow_path + "increment.txt").get(true))) + + "/"; + + LOG_DEBUG(log, "Snapshot will be placed at " + backup_path); + + /// Acquire a snapshot of active data parts to prevent removing while doing backup. + const auto data_parts = getDataParts(); + + size_t parts_processed = 0; + for (const auto & part : data_parts) + { + if (!matcher(part)) + continue; + + LOG_DEBUG(log, "Freezing part " << part->name); + + String part_absolute_path = Poco::Path(part->getFullPath()).absolute().toString(); + if (!startsWith(part_absolute_path, clickhouse_path)) + throw Exception("Part path " + part_absolute_path + " is not inside " + clickhouse_path, ErrorCodes::LOGICAL_ERROR); + + String backup_part_absolute_path = part_absolute_path; + backup_part_absolute_path.replace(0, clickhouse_path.size(), backup_path); + localBackup(part_absolute_path, backup_part_absolute_path); + ++parts_processed; + } + + LOG_DEBUG(log, "Freezed " << parts_processed << " parts"); +} + } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index cb78ba78552..10b2972c6a8 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -723,6 +723,10 @@ private: /// Checks whether the column is in the primary key, possibly wrapped in a chain of functions with single argument. bool isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node) const; + + /// Common part for |freezePartition()| and |freezeAll()|. + using MatcherFn = std::function; + void freezePartitionsByMatcher(MatcherFn matcher, const String & with_name, const Context & context); }; } From baf9566413e6ee4c09fdf2d4d9bb92034bdf392c Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Thu, 1 Nov 2018 20:47:19 +0300 Subject: [PATCH 034/145] Fix style --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index a18ca332006..cddc5155236 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -2071,7 +2071,8 @@ void MergeTreeData::freezePartition(const ASTPtr & partition_ast, const String & freezePartitionsByMatcher( - [&prefix, &partition_id](DataPartPtr part) { + [&prefix, &partition_id](DataPartPtr part) + { if (prefix) return startsWith(part->info.partition_id, *prefix); else From 71545ec22621a6cf310841c640a3335ac1fa5f27 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Tue, 6 Nov 2018 15:37:38 +0300 Subject: [PATCH 035/145] Sort output of partition query-test - for stable reference. --- .../0_stateless/00428_partition.reference | 68 +++++++++---------- .../queries/0_stateless/00428_partition.sh | 4 +- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00428_partition.reference b/dbms/tests/queries/0_stateless/00428_partition.reference index 0bcf3393620..241048131f7 100644 --- a/dbms/tests/queries/0_stateless/00428_partition.reference +++ b/dbms/tests/queries/0_stateless/00428_partition.reference @@ -1,44 +1,44 @@ 5 5 -55a54008ad1ba589aa210d2629c1df41 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/primary.idx -88cdc31ded355e7572d68d8cde525d3a shadow/1/data/test/partition_428/19700201_19700201_1_1_0/p.bin -4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/p.mrk -e2af3bef1fd129aea73a890ede1e7a30 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/k.bin -4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/k.mrk -082814b5aa5109160d5c0c5aff10d4df shadow/1/data/test/partition_428/19700201_19700201_1_1_0/v1.bin -4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/v1.mrk -77d5af402ada101574f4da114f242e02 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/columns.txt -b01e3d4df40467db3f1c2d029f59f6a2 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/checksums.txt -9e688c58a5487b8eaf69c9e1005ad0bf shadow/1/data/test/partition_428/19700102_19700102_2_2_0/primary.idx -cfcb770c3ecd0990dcceb1bde129e6c6 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/p.bin -4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/p.mrk 082814b5aa5109160d5c0c5aff10d4df shadow/1/data/test/partition_428/19700102_19700102_2_2_0/k.bin -4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/k.mrk -38e62ff37e1e5064e9a3f605dfe09d13 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/v1.bin -4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/v1.mrk -77d5af402ada101574f4da114f242e02 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/columns.txt -e6654eba9e88b001280d3bdd21ccc417 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/checksums.txt -b026324c6904b2a9cb4b88d6d61c81d1 shadow/increment.txt -5 -5 -55a54008ad1ba589aa210d2629c1df41 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/primary.idx -88cdc31ded355e7572d68d8cde525d3a shadow/1/data/test/partition_428/19700201_19700201_1_1_0/p.bin -4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/p.mrk -e2af3bef1fd129aea73a890ede1e7a30 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/k.bin -4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/k.mrk 082814b5aa5109160d5c0c5aff10d4df shadow/1/data/test/partition_428/19700201_19700201_1_1_0/v1.bin -4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/v1.mrk -77d5af402ada101574f4da114f242e02 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/columns.txt -b01e3d4df40467db3f1c2d029f59f6a2 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/checksums.txt -9e688c58a5487b8eaf69c9e1005ad0bf shadow/1/data/test/partition_428/19700102_19700102_2_2_0/primary.idx -cfcb770c3ecd0990dcceb1bde129e6c6 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/p.bin -4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/p.mrk -082814b5aa5109160d5c0c5aff10d4df shadow/1/data/test/partition_428/19700102_19700102_2_2_0/k.bin -4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/k.mrk 38e62ff37e1e5064e9a3f605dfe09d13 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/v1.bin +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/k.mrk +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/p.mrk 4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/v1.mrk +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/k.mrk +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/p.mrk +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/v1.mrk +55a54008ad1ba589aa210d2629c1df41 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/primary.idx 77d5af402ada101574f4da114f242e02 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/columns.txt -e6654eba9e88b001280d3bdd21ccc417 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/checksums.txt +77d5af402ada101574f4da114f242e02 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/columns.txt +88cdc31ded355e7572d68d8cde525d3a shadow/1/data/test/partition_428/19700201_19700201_1_1_0/p.bin +9e688c58a5487b8eaf69c9e1005ad0bf shadow/1/data/test/partition_428/19700102_19700102_2_2_0/primary.idx +b01e3d4df40467db3f1c2d029f59f6a2 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/checksums.txt b026324c6904b2a9cb4b88d6d61c81d1 shadow/increment.txt +cfcb770c3ecd0990dcceb1bde129e6c6 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/p.bin +e2af3bef1fd129aea73a890ede1e7a30 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/k.bin +e6654eba9e88b001280d3bdd21ccc417 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/checksums.txt +5 +5 +082814b5aa5109160d5c0c5aff10d4df shadow/1/data/test/partition_428/19700102_19700102_2_2_0/k.bin +082814b5aa5109160d5c0c5aff10d4df shadow/1/data/test/partition_428/19700201_19700201_1_1_0/v1.bin +38e62ff37e1e5064e9a3f605dfe09d13 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/v1.bin +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/k.mrk +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/p.mrk +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/v1.mrk +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/k.mrk +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/p.mrk +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/v1.mrk +55a54008ad1ba589aa210d2629c1df41 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/primary.idx +77d5af402ada101574f4da114f242e02 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/columns.txt +77d5af402ada101574f4da114f242e02 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/columns.txt +88cdc31ded355e7572d68d8cde525d3a shadow/1/data/test/partition_428/19700201_19700201_1_1_0/p.bin +9e688c58a5487b8eaf69c9e1005ad0bf shadow/1/data/test/partition_428/19700102_19700102_2_2_0/primary.idx +b01e3d4df40467db3f1c2d029f59f6a2 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/checksums.txt +b026324c6904b2a9cb4b88d6d61c81d1 shadow/increment.txt +cfcb770c3ecd0990dcceb1bde129e6c6 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/p.bin +e2af3bef1fd129aea73a890ede1e7a30 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/k.bin +e6654eba9e88b001280d3bdd21ccc417 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/checksums.txt 31,1,2 1,2,3 diff --git a/dbms/tests/queries/0_stateless/00428_partition.sh b/dbms/tests/queries/0_stateless/00428_partition.sh index 2bb56e5d569..27cb94c1d4d 100755 --- a/dbms/tests/queries/0_stateless/00428_partition.sh +++ b/dbms/tests/queries/0_stateless/00428_partition.sh @@ -26,7 +26,7 @@ done $chl "ALTER TABLE test.partition_428 FREEZE" # Do `cd` for consistent output for reference -cd $ch_dir && find shadow -type f -exec md5sum {} \; +cd $ch_dir && find shadow -type f -exec md5sum {} \; | sort $chl "ALTER TABLE test.partition_428 DETACH PARTITION 197001" $chl "ALTER TABLE test.partition_428 ATTACH PARTITION 197001" @@ -40,7 +40,7 @@ done $chl "ALTER TABLE test.partition_428 MODIFY COLUMN v1 Int8" # Check the backup hasn't changed -cd $ch_dir && find shadow -type f -exec md5sum {} \; +cd $ch_dir && find shadow -type f -exec md5sum {} \; | sort $chl "OPTIMIZE TABLE test.partition_428" From aeb0759f2214db534cc39d78ca08a88e7893be91 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Fri, 9 Nov 2018 14:32:55 +0300 Subject: [PATCH 036/145] Improve clang-format style --- .clang-format | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.clang-format b/.clang-format index 06c3d164ee7..7ddeba81288 100644 --- a/.clang-format +++ b/.clang-format @@ -1,6 +1,6 @@ BasedOnStyle: WebKit Language: Cpp -AlignAfterOpenBracket: false +AlignAfterOpenBracket: AlwaysBreak BreakBeforeBraces: Custom BraceWrapping: AfterClass: true @@ -25,7 +25,7 @@ Standard: Cpp11 PointerAlignment: Middle MaxEmptyLinesToKeep: 2 KeepEmptyLinesAtTheStartOfBlocks: false -AllowShortFunctionsOnASingleLine: Empty +AllowShortFunctionsOnASingleLine: InlineOnly AlwaysBreakTemplateDeclarations: true IndentCaseLabels: true SpaceAfterTemplateKeyword: true From 4eb0f33ec2c8484b3238b419b1fb0319f872b68e Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Mon, 12 Nov 2018 15:24:15 +0300 Subject: [PATCH 037/145] Add some locks on freeze and support |freezeAll()| for replicated merge-tree. --- dbms/src/Storages/StorageMergeTree.cpp | 3 +++ dbms/src/Storages/StorageReplicatedMergeTree.cpp | 12 ++++++++++++ 2 files changed, 15 insertions(+) diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 14451a46384..0067f6e5de1 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -815,6 +815,8 @@ void StorageMergeTree::dropPartition(const ASTPtr & /*query*/, const ASTPtr & pa void StorageMergeTree::attachPartition(const ASTPtr & partition, bool part, const Context & context) { + // TODO: should get some locks to prevent race with 'alter … modify column' + String partition_id; if (part) @@ -869,6 +871,7 @@ void StorageMergeTree::attachPartition(const ASTPtr & partition, bool part, cons void StorageMergeTree::freezePartition(const ASTPtr & partition, const String & with_name, const Context & context) { + auto lock = lockStructure(false, __PRETTY_FUNCTION__); data.freezePartition(partition, with_name, context); } diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 1d29a6dba8b..b430a66fc8c 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -3279,6 +3279,15 @@ void StorageReplicatedMergeTree::alter(const AlterCommands & params, } } + /// Do freeze of all parts local-only after all other operations. + for (const AlterCommand & param : params) + { + if (param.type == AlterCommand::FREEZE_ALL) + { + data.freezeAll(param.with_name, context); + } + } + LOG_DEBUG(log, "ALTER finished"); } @@ -3427,6 +3436,8 @@ void StorageReplicatedMergeTree::truncate(const ASTPtr & query) void StorageReplicatedMergeTree::attachPartition(const ASTPtr & partition, bool attach_part, const Context & context) { + // TODO: should get some locks to prevent race with 'alter … modify column' + assertNotReadonly(); String partition_id; @@ -4156,6 +4167,7 @@ void StorageReplicatedMergeTree::fetchPartition(const ASTPtr & partition, const void StorageReplicatedMergeTree::freezePartition(const ASTPtr & partition, const String & with_name, const Context & context) { + auto lock = lockStructure(false, __PRETTY_FUNCTION__); data.freezePartition(partition, with_name, context); } From b869cfed9a3dc65e4a3541db4d31603df6a770aa Mon Sep 17 00:00:00 2001 From: Sabyanin Maxim Date: Mon, 12 Nov 2018 18:45:35 +0300 Subject: [PATCH 038/145] fixed couple of mistakes. --- .../Interpreters/InterpreterDescribeQuery.cpp | 1 - dbms/src/Parsers/ASTAlterQuery.cpp | 2 +- dbms/src/Parsers/ASTColumnDeclaration.h | 2 +- dbms/src/Storages/ColumnComment.h | 13 --- dbms/src/Storages/ColumnsDescription.h | 7 +- .../System/StorageSystemPartsBase.cpp | 3 +- .../0_stateless/00030_alter_table.reference | 52 ++++----- .../00061_merge_tree_alter.reference | 106 +++++++++--------- .../00079_defaulted_columns.reference | 54 ++++----- .../00168_buffer_defaults.reference | 12 +- .../0_stateless/00294_shard_enums.reference | 40 +++---- .../0_stateless/00415_into_outfile.reference | 2 +- ...c_table_functions_and_subqueries.reference | 46 ++++---- .../00557_alter_null_storage_tables.reference | 6 +- .../queries/0_stateless/00642_cast.reference | 4 +- ...ult_database_on_client_reconnect.reference | 2 +- .../00700_decimal_defaults.reference | 12 +- 17 files changed, 176 insertions(+), 188 deletions(-) delete mode 100644 dbms/src/Storages/ColumnComment.h diff --git a/dbms/src/Interpreters/InterpreterDescribeQuery.cpp b/dbms/src/Interpreters/InterpreterDescribeQuery.cpp index 6ddd9d93319..db33383b76d 100644 --- a/dbms/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/dbms/src/Interpreters/InterpreterDescribeQuery.cpp @@ -45,7 +45,6 @@ Block InterpreterDescribeQuery::getSampleBlock() col.name = "default_expression"; block.insert(col); - // TODO: may be it unneeded col.name = "comment_expression"; block.insert(col); diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp index 65f281a3f71..de379fe5e3d 100644 --- a/dbms/src/Parsers/ASTAlterQuery.cpp +++ b/dbms/src/Parsers/ASTAlterQuery.cpp @@ -143,7 +143,7 @@ void ASTAlterCommand::formatImpl( } else if (type == ASTAlterCommand::COMMENT_COLUMN) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "COMMENT COLUMN " << (settings.hilite ? hilite_identifier : ""); + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "COMMENT COLUMN " << (settings.hilite ? hilite_none : ""); column->formatImpl(settings, state, frame); settings.ostr << " " << (settings.hilite ? hilite_none : ""); comment->formatImpl(settings, state, frame); diff --git a/dbms/src/Parsers/ASTColumnDeclaration.h b/dbms/src/Parsers/ASTColumnDeclaration.h index 010c845b244..870472fcb30 100644 --- a/dbms/src/Parsers/ASTColumnDeclaration.h +++ b/dbms/src/Parsers/ASTColumnDeclaration.h @@ -65,7 +65,7 @@ public: if (comment) { - settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "COMMENT " << (settings.hilite ? hilite_none : "") << ' '; + settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "COMMENT" << (settings.hilite ? hilite_none : "") << ' '; comment->formatImpl(settings, state, frame); } } diff --git a/dbms/src/Storages/ColumnComment.h b/dbms/src/Storages/ColumnComment.h deleted file mode 100644 index 0fc4e7e2742..00000000000 --- a/dbms/src/Storages/ColumnComment.h +++ /dev/null @@ -1,13 +0,0 @@ -#pragma once - -#include - -#include -#include - -namespace DB -{ - -using ColumnComments = std::unordered_map; - -} diff --git a/dbms/src/Storages/ColumnsDescription.h b/dbms/src/Storages/ColumnsDescription.h index 0fe88140ba1..89457421364 100644 --- a/dbms/src/Storages/ColumnsDescription.h +++ b/dbms/src/Storages/ColumnsDescription.h @@ -2,7 +2,6 @@ #include #include -#include #include #include @@ -10,6 +9,8 @@ namespace DB { +using ColumnComments = std::unordered_map; + struct ColumnsDescription { NamesAndTypesList ordinary; @@ -24,8 +25,8 @@ struct ColumnsDescription NamesAndTypesList ordinary_, NamesAndTypesList materialized_, NamesAndTypesList aliases_, - ColumnDefaults defaults_ = {}, - ColumnComments comments_ = {}) + ColumnDefaults defaults_, + ColumnComments comments_) : ordinary(std::move(ordinary_)) , materialized(std::move(materialized_)) , aliases(std::move(aliases_)) diff --git a/dbms/src/Storages/System/StorageSystemPartsBase.cpp b/dbms/src/Storages/System/StorageSystemPartsBase.cpp index 8cbf497e43c..9d7b2745721 100644 --- a/dbms/src/Storages/System/StorageSystemPartsBase.cpp +++ b/dbms/src/Storages/System/StorageSystemPartsBase.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -306,7 +307,7 @@ StorageSystemPartsBase::StorageSystemPartsBase(std::string name_, NamesAndTypesL add_alias("bytes", "bytes_on_disk"); add_alias("marks_size", "marks_bytes"); - setColumns(ColumnsDescription(std::move(columns_), {}, std::move(aliases), std::move(defaults))); + setColumns(ColumnsDescription(std::move(columns_), {}, std::move(aliases), std::move(defaults), ColumnComments{})); } } diff --git a/dbms/tests/queries/0_stateless/00030_alter_table.reference b/dbms/tests/queries/0_stateless/00030_alter_table.reference index ac2c0af78f4..05022b485f7 100644 --- a/dbms/tests/queries/0_stateless/00030_alter_table.reference +++ b/dbms/tests/queries/0_stateless/00030_alter_table.reference @@ -1,27 +1,27 @@ -CounterID UInt32 -StartDate Date -UserID UInt32 -VisitID UInt32 -NestedColumn.A Array(UInt8) -NestedColumn.S Array(String) -ToDrop UInt32 -Added0 UInt32 -Added1 UInt32 -Added2 UInt32 -AddedNested1.A Array(UInt32) -AddedNested1.B Array(UInt64) -AddedNested1.C Array(String) -AddedNested2.A Array(UInt32) -AddedNested2.B Array(UInt64) -CounterID UInt32 -StartDate Date -UserID UInt32 -VisitID UInt32 -Added0 String -Added1 UInt32 -Added2 UInt32 -AddedNested1.A Array(UInt32) -AddedNested1.C Array(String) -AddedNested2.A Array(UInt32) -AddedNested2.B Array(UInt64) +CounterID UInt32 +StartDate Date +UserID UInt32 +VisitID UInt32 +NestedColumn.A Array(UInt8) +NestedColumn.S Array(String) +ToDrop UInt32 +Added0 UInt32 +Added1 UInt32 +Added2 UInt32 +AddedNested1.A Array(UInt32) +AddedNested1.B Array(UInt64) +AddedNested1.C Array(String) +AddedNested2.A Array(UInt32) +AddedNested2.B Array(UInt64) +CounterID UInt32 +StartDate Date +UserID UInt32 +VisitID UInt32 +Added0 String +Added1 UInt32 +Added2 UInt32 +AddedNested1.A Array(UInt32) +AddedNested1.C Array(String) +AddedNested2.A Array(UInt32) +AddedNested2.B Array(UInt64) 1 2014-01-01 2 3 0 0 [] [] [] [] diff --git a/dbms/tests/queries/0_stateless/00061_merge_tree_alter.reference b/dbms/tests/queries/0_stateless/00061_merge_tree_alter.reference index 48a896785fd..9545f7b4200 100644 --- a/dbms/tests/queries/0_stateless/00061_merge_tree_alter.reference +++ b/dbms/tests/queries/0_stateless/00061_merge_tree_alter.reference @@ -1,56 +1,56 @@ -d Date -k UInt64 -i32 Int32 +d Date +k UInt64 +i32 Int32 CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32) ENGINE = MergeTree(d, k, 8192) 2015-01-01 10 42 -d Date -k UInt64 -i32 Int32 -n.ui8 Array(UInt8) -n.s Array(String) +d Date +k UInt64 +i32 Int32 +n.ui8 Array(UInt8) +n.s Array(String) CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32, `n.ui8` Array(UInt8), `n.s` Array(String)) ENGINE = MergeTree(d, k, 8192) 2015-01-01 8 40 [1,2,3] ['12','13','14'] 2015-01-01 10 42 [] [] -d Date -k UInt64 -i32 Int32 -n.ui8 Array(UInt8) -n.s Array(String) -n.d Array(Date) +d Date +k UInt64 +i32 Int32 +n.ui8 Array(UInt8) +n.s Array(String) +n.d Array(Date) CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32, `n.ui8` Array(UInt8), `n.s` Array(String), `n.d` Array(Date)) ENGINE = MergeTree(d, k, 8192) 2015-01-01 7 39 [10,20,30] ['120','130','140'] ['2000-01-01','2000-01-01','2000-01-03'] 2015-01-01 8 40 [1,2,3] ['12','13','14'] ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 10 42 [] [] [] -d Date -k UInt64 -i32 Int32 -n.ui8 Array(UInt8) -n.s Array(String) -n.d Array(Date) -s String DEFAULT \'0\' +d Date +k UInt64 +i32 Int32 +n.ui8 Array(UInt8) +n.s Array(String) +n.d Array(Date) +s String DEFAULT \'0\' CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32, `n.ui8` Array(UInt8), `n.s` Array(String), `n.d` Array(Date), s String DEFAULT \'0\') ENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 [10,20,30] ['asd','qwe','qwe'] ['2000-01-01','2000-01-01','2000-01-03'] 100500 2015-01-01 7 39 [10,20,30] ['120','130','140'] ['2000-01-01','2000-01-01','2000-01-03'] 0 2015-01-01 8 40 [1,2,3] ['12','13','14'] ['0000-00-00','0000-00-00','0000-00-00'] 0 2015-01-01 10 42 [] [] [] 0 -d Date -k UInt64 -i32 Int32 -n.ui8 Array(UInt8) -n.s Array(String) -s Int64 +d Date +k UInt64 +i32 Int32 +n.ui8 Array(UInt8) +n.s Array(String) +s Int64 CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32, `n.ui8` Array(UInt8), `n.s` Array(String), s Int64) ENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 [10,20,30] ['asd','qwe','qwe'] 100500 2015-01-01 7 39 [10,20,30] ['120','130','140'] 0 2015-01-01 8 40 [1,2,3] ['12','13','14'] 0 2015-01-01 10 42 [] [] 0 -d Date -k UInt64 -i32 Int32 -n.ui8 Array(UInt8) -n.s Array(String) -s UInt32 -n.d Array(Date) +d Date +k UInt64 +i32 Int32 +n.ui8 Array(UInt8) +n.s Array(String) +s UInt32 +n.d Array(Date) CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32, `n.ui8` Array(UInt8), `n.s` Array(String), s UInt32, `n.d` Array(Date)) ENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 [10,20,30] ['asd','qwe','qwe'] 100500 ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 7 39 [10,20,30] ['120','130','140'] 0 ['0000-00-00','0000-00-00','0000-00-00'] @@ -60,40 +60,40 @@ CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32, `n.ui8` Array(UInt8), 2015-01-01 7 39 [10,20,30] ['120','130','140'] 0 ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 8 40 [1,2,3] ['12','13','14'] 0 ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 10 42 [] [] 0 [] -d Date -k UInt64 -i32 Int32 -n.s Array(String) -s UInt32 +d Date +k UInt64 +i32 Int32 +n.s Array(String) +s UInt32 CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32, `n.s` Array(String), s UInt32) ENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 ['asd','qwe','qwe'] 100500 2015-01-01 7 39 ['120','130','140'] 0 2015-01-01 8 40 ['12','13','14'] 0 2015-01-01 10 42 [] 0 -d Date -k UInt64 -i32 Int32 -s UInt32 +d Date +k UInt64 +i32 Int32 +s UInt32 CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32, s UInt32) ENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 100500 2015-01-01 7 39 0 2015-01-01 8 40 0 2015-01-01 10 42 0 -d Date -k UInt64 -i32 Int32 -s UInt32 -n.s Array(String) -n.d Array(Date) +d Date +k UInt64 +i32 Int32 +s UInt32 +n.s Array(String) +n.d Array(Date) CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32, s UInt32, `n.s` Array(String), `n.d` Array(Date)) ENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 100500 [] [] 2015-01-01 7 39 0 [] [] 2015-01-01 8 40 0 [] [] 2015-01-01 10 42 0 [] [] -d Date -k UInt64 -i32 Int32 -s UInt32 +d Date +k UInt64 +i32 Int32 +s UInt32 CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32, s UInt32) ENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 100500 2015-01-01 7 39 0 diff --git a/dbms/tests/queries/0_stateless/00079_defaulted_columns.reference b/dbms/tests/queries/0_stateless/00079_defaulted_columns.reference index 0665588a532..135da6b3681 100644 --- a/dbms/tests/queries/0_stateless/00079_defaulted_columns.reference +++ b/dbms/tests/queries/0_stateless/00079_defaulted_columns.reference @@ -1,42 +1,42 @@ -col1 UInt8 DEFAULT 0 -col1 UInt32 -col2 UInt64 DEFAULT col1 + 1 -col3 UInt64 MATERIALIZED col1 + 2 -col4 UInt64 ALIAS col1 + 3 +col1 UInt8 DEFAULT 0 +col1 UInt32 +col2 UInt64 DEFAULT col1 + 1 +col3 UInt64 MATERIALIZED col1 + 2 +col4 UInt64 ALIAS col1 + 3 10 11 12 13 99 -payload String -date Date MATERIALIZED today() -key UInt64 MATERIALIZED 0 * rand() +payload String +date Date MATERIALIZED today() +key UInt64 MATERIALIZED 0 * rand() hello clickhouse -payload String -date Date MATERIALIZED today() -key UInt64 MATERIALIZED 0 * rand() -payload_length UInt64 MATERIALIZED length(payload) +payload String +date Date MATERIALIZED today() +key UInt64 MATERIALIZED 0 * rand() +payload_length UInt64 MATERIALIZED length(payload) hello clickhouse 16 hello clickhouse 16 some string 11 hello clickhouse 16 some string 11 -payload String -payload_length UInt64 DEFAULT length(payload) -date Date MATERIALIZED today() -key UInt64 MATERIALIZED 0 * rand() +payload String +payload_length UInt64 DEFAULT length(payload) +date Date MATERIALIZED today() +key UInt64 MATERIALIZED 0 * rand() hello clickhouse 16 some string 11 -payload String -payload_length UInt16 DEFAULT length(payload) % 65535 -date Date MATERIALIZED today() -key UInt64 MATERIALIZED 0 * rand() +payload String +payload_length UInt16 DEFAULT length(payload) % 65535 +date Date MATERIALIZED today() +key UInt64 MATERIALIZED 0 * rand() hello clickhouse 16 some string 11 -payload String -payload_length UInt16 DEFAULT CAST(length(payload), \'UInt16\') -date Date MATERIALIZED today() -key UInt64 MATERIALIZED 0 * rand() -payload String -date Date MATERIALIZED today() -key UInt64 MATERIALIZED 0 * rand() +payload String +payload_length UInt16 DEFAULT CAST(length(payload), \'UInt16\') +date Date MATERIALIZED today() +key UInt64 MATERIALIZED 0 * rand() +payload String +date Date MATERIALIZED today() +key UInt64 MATERIALIZED 0 * rand() hello clickhouse some string diff --git a/dbms/tests/queries/0_stateless/00168_buffer_defaults.reference b/dbms/tests/queries/0_stateless/00168_buffer_defaults.reference index 3f70f0ee2c4..aa378b78ba9 100644 --- a/dbms/tests/queries/0_stateless/00168_buffer_defaults.reference +++ b/dbms/tests/queries/0_stateless/00168_buffer_defaults.reference @@ -1,9 +1,9 @@ -EventDate Date -UTCEventTime DateTime -MoscowEventDate Date DEFAULT toDate(UTCEventTime) -EventDate Date -UTCEventTime DateTime -MoscowEventDate Date DEFAULT toDate(UTCEventTime) +EventDate Date +UTCEventTime DateTime +MoscowEventDate Date DEFAULT toDate(UTCEventTime) +EventDate Date +UTCEventTime DateTime +MoscowEventDate Date DEFAULT toDate(UTCEventTime) 2015-06-09 2015-06-09 01:02:03 2015-06-09 2015-06-09 2015-06-09 01:02:03 2015-06-09 2015-06-09 2015-06-09 01:02:03 2015-06-09 diff --git a/dbms/tests/queries/0_stateless/00294_shard_enums.reference b/dbms/tests/queries/0_stateless/00294_shard_enums.reference index 0f354d57230..28dc7781253 100644 --- a/dbms/tests/queries/0_stateless/00294_shard_enums.reference +++ b/dbms/tests/queries/0_stateless/00294_shard_enums.reference @@ -1,28 +1,28 @@ -d Date DEFAULT CAST(\'2015-12-29\', \'Date\') -k UInt8 DEFAULT 0 -e Enum8(\'hello\' = 1, \'world\' = 2) -sign Enum8(\'minus\' = -1, \'plus\' = 1) -letter Enum16(\'*\' = -256, \'a\' = 0, \'b\' = 1, \'c\' = 2) +d Date DEFAULT CAST(\'2015-12-29\', \'Date\') +k UInt8 DEFAULT 0 +e Enum8(\'hello\' = 1, \'world\' = 2) +sign Enum8(\'minus\' = -1, \'plus\' = 1) +letter Enum16(\'*\' = -256, \'a\' = 0, \'b\' = 1, \'c\' = 2) 2015-12-29 0 hello minus * -d Date DEFAULT CAST(\'2015-12-29\', \'Date\') -k UInt8 DEFAULT 0 -e Enum8(\'hello\' = 1, \'world\' = 2, \'!\' = 3) -sign Enum8(\'minus\' = -1, \'plus\' = 1) -letter Enum16(\'*\' = -256, \'a\' = 0, \'b\' = 1, \'c\' = 2) +d Date DEFAULT CAST(\'2015-12-29\', \'Date\') +k UInt8 DEFAULT 0 +e Enum8(\'hello\' = 1, \'world\' = 2, \'!\' = 3) +sign Enum8(\'minus\' = -1, \'plus\' = 1) +letter Enum16(\'*\' = -256, \'a\' = 0, \'b\' = 1, \'c\' = 2) 2015-12-29 0 hello minus * 2015-12-29 0 ! plus b -d Date DEFAULT CAST(\'2015-12-29\', \'Date\') -k UInt8 DEFAULT 0 -e Enum16(\'hello\' = 1, \'world\' = 2, \'!\' = 3) -sign Enum16(\'minus\' = -1, \'plus\' = 1) -letter Enum16(\'no letter\' = -256, \'a\' = 0, \'b\' = 1, \'c\' = 2) +d Date DEFAULT CAST(\'2015-12-29\', \'Date\') +k UInt8 DEFAULT 0 +e Enum16(\'hello\' = 1, \'world\' = 2, \'!\' = 3) +sign Enum16(\'minus\' = -1, \'plus\' = 1) +letter Enum16(\'no letter\' = -256, \'a\' = 0, \'b\' = 1, \'c\' = 2) 2015-12-29 0 hello minus no letter 2015-12-29 0 ! plus b -d Date DEFAULT CAST(\'2015-12-29\', \'Date\') -k UInt8 DEFAULT 0 -e Enum8(\'hello\' = 1, \'world\' = 2, \'!\' = 3) -sign Enum8(\'minus\' = -1, \'plus\' = 1) -letter Enum16(\'no letter\' = -256, \'a\' = 0, \'b\' = 1, \'c\' = 2) +d Date DEFAULT CAST(\'2015-12-29\', \'Date\') +k UInt8 DEFAULT 0 +e Enum8(\'hello\' = 1, \'world\' = 2, \'!\' = 3) +sign Enum8(\'minus\' = -1, \'plus\' = 1) +letter Enum16(\'no letter\' = -256, \'a\' = 0, \'b\' = 1, \'c\' = 2) 2015-12-29 0 hello minus no letter 2015-12-29 0 ! plus b 2015-12-29 0 world minus c diff --git a/dbms/tests/queries/0_stateless/00415_into_outfile.reference b/dbms/tests/queries/0_stateless/00415_into_outfile.reference index 0aa3993f4c0..1fc1ceac892 100644 --- a/dbms/tests/queries/0_stateless/00415_into_outfile.reference +++ b/dbms/tests/queries/0_stateless/00415_into_outfile.reference @@ -6,7 +6,7 @@ performing test: union_all performing test: bad_union_all query failed performing test: describe_table -dummy UInt8 +dummy UInt8 performing test: clickhouse-local 2 3 performing test: http diff --git a/dbms/tests/queries/0_stateless/00515_shard_desc_table_functions_and_subqueries.reference b/dbms/tests/queries/0_stateless/00515_shard_desc_table_functions_and_subqueries.reference index f24dcad7c8d..5ccb1694c92 100644 --- a/dbms/tests/queries/0_stateless/00515_shard_desc_table_functions_and_subqueries.reference +++ b/dbms/tests/queries/0_stateless/00515_shard_desc_table_functions_and_subqueries.reference @@ -1,30 +1,30 @@ -date Date -val UInt64 -val2 UInt8 DEFAULT 42 -val3 UInt8 DEFAULT CAST(val2 + 1, \'UInt8\') -val4 UInt64 ALIAS val +date Date +val UInt64 +val2 UInt8 DEFAULT 42 +val3 UInt8 DEFAULT CAST(val2 + 1, \'UInt8\') +val4 UInt64 ALIAS val - -date Date -val UInt64 -val2 UInt8 DEFAULT 42 -val3 UInt8 DEFAULT CAST(val2 + 1, \'UInt8\') -val4 UInt64 ALIAS val +date Date +val UInt64 +val2 UInt8 DEFAULT 42 +val3 UInt8 DEFAULT CAST(val2 + 1, \'UInt8\') +val4 UInt64 ALIAS val - -date Date -val UInt64 -val2 UInt8 DEFAULT 42 -val3 UInt8 DEFAULT CAST(val2 + 1, \'UInt8\') -val4 UInt64 ALIAS val +date Date +val UInt64 +val2 UInt8 DEFAULT 42 +val3 UInt8 DEFAULT CAST(val2 + 1, \'UInt8\') +val4 UInt64 ALIAS val - -date Date -val UInt64 -val2 UInt8 DEFAULT 42 -val3 UInt8 DEFAULT CAST(val2 + 1, \'UInt8\') -val4 UInt64 ALIAS val +date Date +val UInt64 +val2 UInt8 DEFAULT 42 +val3 UInt8 DEFAULT CAST(val2 + 1, \'UInt8\') +val4 UInt64 ALIAS val - -1 UInt8 +1 UInt8 - -1 UInt8 +1 UInt8 - -number UInt64 +number UInt64 - diff --git a/dbms/tests/queries/0_stateless/00557_alter_null_storage_tables.reference b/dbms/tests/queries/0_stateless/00557_alter_null_storage_tables.reference index c9634180c5c..35790c754de 100644 --- a/dbms/tests/queries/0_stateless/00557_alter_null_storage_tables.reference +++ b/dbms/tests/queries/0_stateless/00557_alter_null_storage_tables.reference @@ -1,3 +1,3 @@ -x UInt8 -x Int64 DEFAULT toInt64(y) -y String +x UInt8 +x Int64 DEFAULT toInt64(y) +y String diff --git a/dbms/tests/queries/0_stateless/00642_cast.reference b/dbms/tests/queries/0_stateless/00642_cast.reference index 296f1351c16..56f79769cc7 100644 --- a/dbms/tests/queries/0_stateless/00642_cast.reference +++ b/dbms/tests/queries/0_stateless/00642_cast.reference @@ -8,6 +8,6 @@ hello hello 1970-01-01 00:00:01 CREATE TABLE test.cast ( x UInt8, e Enum8('hello' = 1, 'world' = 2) DEFAULT CAST(x, 'Enum8(\'hello\' = 1, \'world\' = 2)')) ENGINE = MergeTree ORDER BY e SETTINGS index_granularity = 8192 -x UInt8 -e Enum8(\'hello\' = 1, \'world\' = 2) DEFAULT CAST(x, \'Enum8(\\\'hello\\\' = 1, \\\'world\\\' = 2)\') +x UInt8 +e Enum8(\'hello\' = 1, \'world\' = 2) DEFAULT CAST(x, \'Enum8(\\\'hello\\\' = 1, \\\'world\\\' = 2)\') 1 hello diff --git a/dbms/tests/queries/0_stateless/00651_default_database_on_client_reconnect.reference b/dbms/tests/queries/0_stateless/00651_default_database_on_client_reconnect.reference index 4482012335a..8ed1dd9ba87 100644 --- a/dbms/tests/queries/0_stateless/00651_default_database_on_client_reconnect.reference +++ b/dbms/tests/queries/0_stateless/00651_default_database_on_client_reconnect.reference @@ -1 +1 @@ -val UInt64 +val UInt64 diff --git a/dbms/tests/queries/0_stateless/00700_decimal_defaults.reference b/dbms/tests/queries/0_stateless/00700_decimal_defaults.reference index 30500d7c53f..635f40a11fe 100644 --- a/dbms/tests/queries/0_stateless/00700_decimal_defaults.reference +++ b/dbms/tests/queries/0_stateless/00700_decimal_defaults.reference @@ -1,9 +1,9 @@ -a Decimal(9, 4) DEFAULT CAST(0, \'Decimal(9, 4)\') -b Decimal(18, 4) DEFAULT CAST(a / 2, \'Decimal(18, 4)\') -c Decimal(38, 4) DEFAULT CAST(b / 3, \'Decimal(38, 4)\') -d Decimal(9, 4) MATERIALIZED a + toDecimal32(\'0.2\', 1) -e Decimal(18, 4) ALIAS b * 2 -f Decimal(38, 4) ALIAS c * 6 +a Decimal(9, 4) DEFAULT CAST(0, \'Decimal(9, 4)\') +b Decimal(18, 4) DEFAULT CAST(a / 2, \'Decimal(18, 4)\') +c Decimal(38, 4) DEFAULT CAST(b / 3, \'Decimal(38, 4)\') +d Decimal(9, 4) MATERIALIZED a + toDecimal32(\'0.2\', 1) +e Decimal(18, 4) ALIAS b * 2 +f Decimal(38, 4) ALIAS c * 6 0.0000 0.0000 0.0000 1.0000 0.5000 0.1666 2.0000 1.0000 0.3333 From befaea63d2c023d3cd32e6dfe5f038a51de3c28d Mon Sep 17 00:00:00 2001 From: Sabyanin Maxim Date: Tue, 13 Nov 2018 15:08:07 +0300 Subject: [PATCH 039/145] add is_mutable method to AlterCommand. change comment column test --- dbms/src/Parsers/ParserCreateQuery.h | 21 +++++++++++------ dbms/src/Storages/AlterCommands.cpp | 23 +++++++++++++++++++ dbms/src/Storages/AlterCommands.h | 2 ++ dbms/src/Storages/IStorage.h | 4 ++-- .../0_stateless/00725_comment_columns.sql | 16 ++++++++++++- 5 files changed, 56 insertions(+), 10 deletions(-) diff --git a/dbms/src/Parsers/ParserCreateQuery.h b/dbms/src/Parsers/ParserCreateQuery.h index 7d2bdb382cd..2d1dc3f9cdf 100644 --- a/dbms/src/Parsers/ParserCreateQuery.h +++ b/dbms/src/Parsers/ParserCreateQuery.h @@ -128,17 +128,26 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E const auto fallback_pos = pos; if (!s_default.check(pos, expected) && !s_materialized.check(pos, expected) && - !s_alias.check(pos, expected)) + !s_alias.check(pos, expected) && + !s_comment.check(pos, expected)) { type_parser.parse(pos, type, expected); } else pos = fallback_pos; - /// parse {DEFAULT, MATERIALIZED, ALIAS} + /// parse {DEFAULT, MATERIALIZED, ALIAS, COMMENT} String default_specifier; ASTPtr default_expression; + ASTPtr comment_expression; Pos pos_before_specifier = pos; + if (!s_default.ignore(pos, expected) && + !s_materialized.ignore(pos, expected) && + !s_alias.ignore(pos, expected) && + !s_comment.ignore(pos, expected) && + !type) + return false; /// reject sole column name without type + if (s_default.ignore(pos, expected) || s_materialized.ignore(pos, expected) || s_alias.ignore(pos, expected)) @@ -149,14 +158,12 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E if (!expr_parser.parse(pos, default_expression, expected)) return false; } - else if (!type) - return false; /// reject sole column name without type - - ASTPtr comment_expression; - if (s_comment.ignore(pos, expected)) + else if (s_comment.ignore(pos, expected)) { string_literal_parser.parse(pos, comment_expression, expected); } + else if (!type) // TODO: тут надо очень хорошо подумать. есть проблема с тем, что для modify column имя колонки и коммент ок, а для создания таблицы не ок. + return false; /// reject sole column name without type const auto column_declaration = std::make_shared(); node = column_declaration; diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index 9e8239b79b3..a16e813961b 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -188,6 +188,20 @@ void AlterCommand::apply(ColumnsDescription & columns_description) const } else if (type == MODIFY_COLUMN) { + if (!is_mutable()) + { + auto & comments = columns_description.comments; + if (comment.empty()) + { + if (auto it = comments.find(column_name); it != comments.end()) + comments.erase(it); + } + else + columns_description.comments[column_name] = comment; + + return; + } + const auto default_it = columns_description.defaults.find(column_name); const auto had_default_expr = default_it != std::end(columns_description.defaults); const auto old_default_kind = had_default_expr ? default_it->second.kind : ColumnDefaultKind{}; @@ -256,6 +270,15 @@ void AlterCommand::apply(ColumnsDescription & columns_description) const throw Exception("Wrong parameter type in ALTER query", ErrorCodes::LOGICAL_ERROR); } +bool AlterCommand::is_mutable() const +{ + if (type == COMMENT_COLUMN) + return false; + if (type == MODIFY_COLUMN) + return data_type.get() || default_expression; + + return true; +} void AlterCommands::apply(ColumnsDescription & columns_description) const { diff --git a/dbms/src/Storages/AlterCommands.h b/dbms/src/Storages/AlterCommands.h index 7c56740a2fe..f6d7861dcce 100644 --- a/dbms/src/Storages/AlterCommands.h +++ b/dbms/src/Storages/AlterCommands.h @@ -56,6 +56,8 @@ struct AlterCommand void apply(ColumnsDescription & columns_description) const; + /// Checks that not only metadata touched by that command + bool is_mutable() const; }; class IStorage; diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index 9e2f9741613..4b4837abc5c 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -239,8 +239,8 @@ public: { for (const auto & param : params) { - if (param.type != AlterCommand::Type::COMMENT_COLUMN) - throw Exception("Method alter only supports change comment of column for storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + if (param.is_mutable()) + throw Exception("Method alter supports only change comment of column for storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } auto lock = lockStructureForAlter(__PRETTY_FUNCTION__); diff --git a/dbms/tests/queries/0_stateless/00725_comment_columns.sql b/dbms/tests/queries/0_stateless/00725_comment_columns.sql index afcb1b34eab..ada4e4e1983 100644 --- a/dbms/tests/queries/0_stateless/00725_comment_columns.sql +++ b/dbms/tests/queries/0_stateless/00725_comment_columns.sql @@ -60,4 +60,18 @@ FROM system.columns WHERE table = 'check_query_comment_column' and database = 'test' FORMAT PrettyCompactNoEscapes; -DROP table test.check_query_comment_column; \ No newline at end of file +DROP TABLE IF test.check_query_comment_column; + +CREATE TABLE test.check_query_comment_column + ( + first_column UInt8 COMMENT 'first comment' + ) ENGINE = TinyLog; + +ALTER TABLE test.check_query_comment_column MODIFY COLUMN first_column COMMENT 'another comment'; + +SELECT table, name, comment +FROM system.columns +WHERE table = 'check_query_comment_column' and database = 'test' +FORMAT PrettyCompactNoEscapes; + +DROP TABLE IF EXISTS test.check_query_comment_column; From 6296f58186272fecb79cf653a0a8ebab7235ec1d Mon Sep 17 00:00:00 2001 From: Sabyanin Maxim Date: Tue, 13 Nov 2018 15:22:32 +0300 Subject: [PATCH 040/145] fix typo --- dbms/src/Storages/ColumnsDescription.h | 2 +- dbms/src/Storages/System/StorageSystemColumns.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/ColumnsDescription.h b/dbms/src/Storages/ColumnsDescription.h index aef00eb9fd2..0b029ccc3e0 100644 --- a/dbms/src/Storages/ColumnsDescription.h +++ b/dbms/src/Storages/ColumnsDescription.h @@ -14,7 +14,7 @@ enum class PresenceType : int32_t InPrimaryKey = 1<<0, InOrderKey = 1<<1, InPartitionKey = 1<<2, - InSamplingKey = 1<<3, + InSampleKey = 1<<3, }; diff --git a/dbms/src/Storages/System/StorageSystemColumns.cpp b/dbms/src/Storages/System/StorageSystemColumns.cpp index 2defae718d9..04b243e1fde 100644 --- a/dbms/src/Storages/System/StorageSystemColumns.cpp +++ b/dbms/src/Storages/System/StorageSystemColumns.cpp @@ -201,7 +201,7 @@ protected: if (columns_mask[src_index++]) res_columns[res_index++]->insert(it->second.Get(PresenceType::InPartitionKey)); if (columns_mask[src_index++]) - res_columns[res_index++]->insert(it->second.Get(PresenceType::InSamplingKey)); + res_columns[res_index++]->insert(it->second.Get(PresenceType::InSampleKey)); } } From 3e4c981db2311942e21270eadf0e0e2aea67fd0f Mon Sep 17 00:00:00 2001 From: Sabyanin Maxim Date: Tue, 13 Nov 2018 22:17:40 +0300 Subject: [PATCH 041/145] small changes --- dbms/src/Interpreters/InterpreterCreateQuery.cpp | 2 +- dbms/src/Storages/ColumnsDescription.h | 9 +++++++-- dbms/src/Storages/System/StorageSystemTables.cpp | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index aac1d047499..62a8cdb26c6 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -385,7 +385,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTCreate { FillColumnPresenceInTableDeclaration(res.presences, storage_def->order_by, PresenceType::InOrderKey); FillColumnPresenceInTableDeclaration(res.presences, storage_def->partition_by, PresenceType::InPartitionKey); - FillColumnPresenceInTableDeclaration(res.presences, storage_def->sample_by, PresenceType::InSamplingKey); + FillColumnPresenceInTableDeclaration(res.presences, storage_def->sample_by, PresenceType::InSampleKey); } if (res.ordinary.size() + res.materialized.size() == 0) diff --git a/dbms/src/Storages/ColumnsDescription.h b/dbms/src/Storages/ColumnsDescription.h index 0b029ccc3e0..a2dc95cce0f 100644 --- a/dbms/src/Storages/ColumnsDescription.h +++ b/dbms/src/Storages/ColumnsDescription.h @@ -22,9 +22,9 @@ enum class PresenceType : int32_t class ColumnPresence { public: - bool Get(PresenceType type) + bool Get(PresenceType type) const { - return static_cast(presenceMask & static_cast(type)); + return (presenceMask & static_cast(type)) != 0; } void Set(PresenceType type) @@ -32,6 +32,11 @@ public: presenceMask |= static_cast(type); } + void Remove(PresenceType type) + { + presenceMask &= ~static_cast(type); + } + private: int32_t presenceMask = 0; }; diff --git a/dbms/src/Storages/System/StorageSystemTables.cpp b/dbms/src/Storages/System/StorageSystemTables.cpp index a7e5f39d0e8..087b4b11ae9 100644 --- a/dbms/src/Storages/System/StorageSystemTables.cpp +++ b/dbms/src/Storages/System/StorageSystemTables.cpp @@ -252,7 +252,7 @@ protected: } else { - src_index += 2; + src_index += 2; // TODO: подумать над этим } const auto table_it = context.getTable(database_name, table_name); From b65250e672eb032563aa3b0c59d5ce5e7a50dee3 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 14 Nov 2018 11:05:34 +0300 Subject: [PATCH 042/145] Better default value in packager script --- docker/packager/binary/Dockerfile | 3 --- docker/packager/deb/Dockerfile | 3 --- docker/packager/packager | 2 +- 3 files changed, 1 insertion(+), 7 deletions(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 2206bcc7e49..49b757e1b00 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -5,9 +5,6 @@ RUN apt-get update -y \ apt-get install --yes --no-install-recommends \ bash \ cmake \ - ccache \ - distcc \ - distcc-pump \ curl \ gcc-7 \ g++-7 \ diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index 08eeb3f3578..9e0abd3afd4 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -5,9 +5,6 @@ RUN apt-get update -y \ apt-get install --yes --no-install-recommends \ bash \ fakeroot \ - ccache \ - distcc \ - distcc-pump \ cmake \ curl \ gcc-7 \ diff --git a/docker/packager/packager b/docker/packager/packager index fed0c54fae5..c0006c7b0bb 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -86,7 +86,7 @@ if __name__ == "__main__": parser.add_argument("--build-type", choices=("debug", ""), default="") parser.add_argument("--compiler", choices=("clang-6.0", "gcc-7", "gcc-8"), default="gcc-7") parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="") - parser.add_argument("--cache", choices=("ccache", "distcc")) + parser.add_argument("--cache", choices=("", "ccache", "distcc"), default="") parser.add_argument("--distcc-hosts", nargs="+") parser.add_argument("--force-build-image", action="store_true") From fc272ecded058e92e021df1a5e8aed16fbb45c80 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Wed, 14 Nov 2018 18:04:23 +0300 Subject: [PATCH 043/145] Some fixes to quantile*() functions related to DateTime argument type --- .../AggregateFunctionQuantile.cpp | 39 +++++++------- .../AggregateFunctionQuantile.h | 53 +++++++++---------- .../00753_quantile_format.reference | 16 ++++++ .../0_stateless/00753_quantile_format.sql | 26 +++++++++ 4 files changed, 85 insertions(+), 49 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00753_quantile_format.reference create mode 100644 dbms/tests/queries/0_stateless/00753_quantile_format.sql diff --git a/dbms/src/AggregateFunctions/AggregateFunctionQuantile.cpp b/dbms/src/AggregateFunctions/AggregateFunctionQuantile.cpp index d8c96a42fcd..fc9abc1bb1d 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionQuantile.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionQuantile.cpp @@ -4,9 +4,6 @@ #include -#include -#include - namespace DB { @@ -52,10 +49,10 @@ static constexpr bool SupportDecimal() } -template