diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 4f506126682..afccce2ed5a 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -44,7 +44,10 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] INDEX index_name2 expr2 TYPE type2(...) [GRANULARITY value2], ... PROJECTION projection_name_1 (SELECT [GROUP BY] [ORDER BY]), - PROJECTION projection_name_2 (SELECT [GROUP BY] [ORDER BY]) + PROJECTION projection_name_2 (SELECT [GROUP BY] [ORDER BY]), + ... + STATISTIC TYPE type1, + STATISTIC TYPE type2 ) ENGINE = MergeTree() ORDER BY expr [PARTITION BY expr] @@ -1353,3 +1356,22 @@ In this sample configuration: - `_part_uuid` — Unique part identifier (if enabled MergeTree setting `assign_part_uuids`). - `_partition_value` — Values (a tuple) of a `partition by` expression. - `_sample_factor` — Sample factor (from the query). + +## Column Statistics (Experimental) {#column-statistics} + +The statistic declaration is in the columns section of the `CREATE` query. + +``` sql +STATISTIC TYPE type +``` + +For tables from the `*MergeTree` family, statistics can be specified. + +These lightweight statistics aggregate information about distribution of values in columns. +They can be used for query optimization (At current time they are used for moving expressions to PREWHERE). + +#### Available Types of Column Statistics {#available-types-of-column-statistics} + +- `tdigest` + + Stores distribution of values from numeric columns in [TDigest](https://github.com/tdunning/t-digest) sketch. diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 9ddb8d83963..cf67b6c9231 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -459,8 +459,8 @@ ASTPtr InterpreterCreateQuery::formatStatistics(const StatisticsDescriptions & s { auto res = std::make_shared(); - for (const auto & statistic : statistics) - res->children.push_back(statistic.definition_ast->clone()); + for (const auto & definition_ast : statistics.definition_asts) + res->children.push_back(definition_ast->clone()); return res; } @@ -721,8 +721,10 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti } if (create.columns_list->stats) for (const auto & statistic : create.columns_list->stats->children) - properties.stats.push_back( - StatisticDescription::getStatisticFromAST(statistic->clone(), properties.columns, getContext())); + { + auto stats = StatisticsDescriptions::getStatisticsFromAST(statistic->clone(), properties.columns, getContext()); + properties.stats.merge(stats); + } if (create.columns_list->projections) for (const auto & projection_ast : create.columns_list->projections->children) diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index b9eb6ee9a96..961a4f7ac72 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -724,17 +724,20 @@ void MutationsInterpreter::prepare(bool dry_run) else if (command.type == MutationCommand::MATERIALIZE_STATISTIC) { mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION); - auto it = std::find_if( - std::cbegin(statistics_desc), std::end(statistics_desc), - [&](const StatisticDescription & statistic) - { - return statistic.column_name == command.statistic_column_name; - }); - if (it == std::cend(statistics_desc)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown statistic column: {}", command.statistic_column_name); + for (const auto & stat_column_name: command.statistic_columns) + { + auto it = std::find_if( + std::cbegin(statistics_desc), std::end(statistics_desc), + [&](const StatisticDescription & statistic) + { + return statistic.column_name == stat_column_name; + }); + if (it == std::cend(statistics_desc)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown statistic column: {}", stat_column_name); - dependencies.emplace(it->column_name, ColumnDependency::STATISTIC); - materialized_statistics.emplace(command.statistic_column_name); + dependencies.emplace(it->column_name, ColumnDependency::STATISTIC); + materialized_statistics.emplace(stat_column_name); + } } else if (command.type == MutationCommand::MATERIALIZE_PROJECTION) { @@ -755,7 +758,8 @@ void MutationsInterpreter::prepare(bool dry_run) else if (command.type == MutationCommand::DROP_STATISTIC) { mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION); - materialized_statistics.erase(command.statistic_column_name); + for (const auto & stat_column_name: command.statistic_columns) + materialized_statistics.erase(stat_column_name); } else if (command.type == MutationCommand::DROP_PROJECTION) { diff --git a/src/Parsers/ASTStatisticDeclaration.cpp b/src/Parsers/ASTStatisticDeclaration.cpp index 196eb994fed..0e20b020ab3 100644 --- a/src/Parsers/ASTStatisticDeclaration.cpp +++ b/src/Parsers/ASTStatisticDeclaration.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -12,17 +13,27 @@ ASTPtr ASTStatisticDeclaration::clone() const { auto res = std::make_shared(); - res->column_name = column_name; + res->set(res->columns, columns->clone()); res->type = type; return res; } - -void ASTStatisticDeclaration::formatImpl(const FormatSettings & s, FormatState &, FormatStateStacked) const +std::vector ASTStatisticDeclaration::getColumnNames() const { - s.ostr << backQuoteIfNeed(column_name); - s.ostr << " "; + std::vector result; + result.reserve(columns->children.size()); + for (const ASTPtr & column_ast : columns->children) + { + result.push_back(column_ast->as().name()); + } + return result; + +} + +void ASTStatisticDeclaration::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const +{ + columns->formatImpl(s, state, frame); s.ostr << (s.hilite ? hilite_keyword : "") << " TYPE " << (s.hilite ? hilite_none : ""); s.ostr << backQuoteIfNeed(type); } diff --git a/src/Parsers/ASTStatisticDeclaration.h b/src/Parsers/ASTStatisticDeclaration.h index 7fba8872a94..f936c93f2ba 100644 --- a/src/Parsers/ASTStatisticDeclaration.h +++ b/src/Parsers/ASTStatisticDeclaration.h @@ -12,13 +12,15 @@ class ASTFunction; class ASTStatisticDeclaration : public IAST { public: - String column_name; - /// We do not support to set bucket number for tdigest + IAST * columns; + /// TODO type should be a list of ASTFunction, for example, 'tdigest(256), hyperloglog(128)', etc. String type; /** Get the text that identifies this element. */ String getID(char) const override { return "Stat"; } + std::vector getColumnNames() const; + ASTPtr clone() const override; void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override; }; diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 3ac12212054..344d00beb4e 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -165,14 +165,13 @@ bool ParserStatisticDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & { ParserKeyword s_type("TYPE"); - ParserIdentifier name_p; + ParserList columns_p(std::make_unique(), std::make_unique(TokenType::Comma), false); ParserIdentifier type_p; - ASTPtr name; - ASTPtr column; + ASTPtr columns; ASTPtr type; - if (!name_p.parse(pos, name, expected)) + if (!columns_p.parse(pos, columns, expected)) return false; if (!s_type.ignore(pos, expected)) @@ -182,7 +181,7 @@ bool ParserStatisticDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & return false; auto stat = std::make_shared(); - stat->column_name = name->as().name(); + stat->set(stat->columns, columns); stat->type = type->as().name(); node = stat; diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 003e39a738a..c1f7711fce7 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -242,9 +242,8 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ const auto & ast_stat_decl = command_ast->statistic_decl->as(); - command.statistic_column_name = ast_stat_decl.column_name; + command.statistic_columns = ast_stat_decl.getColumnNames(); command.statistic_type = ast_stat_decl.type; - command.if_not_exists = command_ast->if_not_exists; return command; @@ -315,7 +314,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.type = AlterCommand::DROP_STATISTIC; const auto & ast_stat_decl = command_ast->statistic_decl->as(); - command.statistic_column_name = ast_stat_decl.column_name; + command.statistic_columns = ast_stat_decl.getColumnNames(); command.statistic_type = ast_stat_decl.type; command.if_exists = command_ast->if_exists; command.clear = command_ast->clear_statistic; @@ -589,45 +588,47 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) } else if (type == ADD_STATISTIC) { - if (!if_not_exists && std::any_of( - metadata.statistics.cbegin(), - metadata.statistics.cend(), - [this](const auto & statistic) - { - return statistic.column_name == statistic_column_name && statistic.type == statistic_type; - })) + /// TODO: Right now we assume there is only one type of statistics for simple implement. + for (const auto & statistic_column_name : statistic_columns) { - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot add statistic {} with type {}: statistic on this column with this type already exists", statistic_column_name, statistic_type); + if (!if_not_exists && std::any_of( + metadata.statistics.cbegin(), + metadata.statistics.cend(), + [&](const auto & statistic) + { + return statistic.column_name == statistic_column_name; + })) + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot add statistic {} with type {}: statistic on this column with this type already exists", statistic_column_name, statistic_type); + } } - auto insert_it = metadata.statistics.end(); - - /// insert the index in the beginning of the indices list - if (first) - insert_it = metadata.statistics.begin(); - - metadata.statistics.emplace(insert_it, StatisticDescription::getStatisticFromAST(statistic_decl, metadata.columns, context)); + auto stats = StatisticsDescriptions::getStatisticsFromAST(statistic_decl, metadata.columns, context); + metadata.statistics.merge(stats); } else if (type == DROP_STATISTIC) { if (!partition && !clear) { - auto erase_it = std::find_if( - metadata.statistics.begin(), - metadata.statistics.end(), - [this](const auto & statistic) - { - return statistic.column_name == statistic_column_name && statistic.type == statistic_type; - }); - - if (erase_it == metadata.statistics.end()) + for (const auto & stat_column_name : statistic_columns) { - if (if_exists) - return; - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong statistic name. Cannot find statistic {} with type {} to drop", backQuote(statistic_column_name), statistic_type); - } + auto erase_it = std::find_if( + metadata.statistics.begin(), + metadata.statistics.end(), + [stat_column_name](const auto & statistic) + { + return statistic.column_name == stat_column_name; + }); - metadata.statistics.erase(erase_it); + if (erase_it == metadata.statistics.end()) + { + if (if_exists) + return; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong statistic name. Cannot find statistic {} with type {} to drop", backQuote(stat_column_name), statistic_type); + } + LOG_INFO(&Poco::Logger::get("drop_stat"), "dropping statistic {}", erase_it->column_name); + metadata.statistics.erase(erase_it); + } } } else if (type == ADD_CONSTRAINT) @@ -958,7 +959,7 @@ std::optional AlterCommand::tryConvertToMutationCommand(Storage else if (type == DROP_STATISTIC) { result.type = MutationCommand::Type::DROP_STATISTIC; - result.column_name = statistic_column_name; + result.statistic_columns = statistic_columns; if (clear) result.clear = true; diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h index 03a6fcefe22..f5b7c1c0063 100644 --- a/src/Storages/AlterCommands.h +++ b/src/Storages/AlterCommands.h @@ -121,7 +121,7 @@ struct AlterCommand String projection_name; ASTPtr statistic_decl = nullptr; - String statistic_column_name; + std::vector statistic_columns; String statistic_type; /// For MODIFY TTL diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index b93604bcac6..2093f667fcb 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -580,9 +580,11 @@ static StoragePtr create(const StorageFactory::Arguments & args) metadata.secondary_indices.push_back(IndexDescription::getIndexFromAST(index, columns, context)); if (args.query.columns_list && args.query.columns_list->stats) - for (const auto & stat : args.query.columns_list->stats->children) - metadata.statistics.push_back( - StatisticDescription::getStatisticFromAST(stat, columns, args.getContext())); + for (const auto & stat_ast : args.query.columns_list->stats->children) + { + auto stats = StatisticsDescriptions::getStatisticsFromAST(stat_ast, columns, args.getContext()); + metadata.statistics.merge(stats); + } if (args.query.columns_list && args.query.columns_list->projections) for (auto & projection_ast : args.query.columns_list->projections->children) diff --git a/src/Storages/MutationCommands.cpp b/src/Storages/MutationCommands.cpp index 4c0f0d80a69..46322f3ef05 100644 --- a/src/Storages/MutationCommands.cpp +++ b/src/Storages/MutationCommands.cpp @@ -77,7 +77,11 @@ std::optional MutationCommand::parse(ASTAlterCommand * command, res.type = MATERIALIZE_STATISTIC; res.partition = command->partition; res.predicate = nullptr; - res.statistic_column_name = command->statistic_decl->as().column_name; + for (const ASTPtr & column_ast : command->statistic_decl->as().columns->children) + { + const auto & column = column_ast->as().getColumnName(); + res.statistic_columns.push_back(column); + } return res; } else if (command->type == ASTAlterCommand::MATERIALIZE_PROJECTION) diff --git a/src/Storages/MutationCommands.h b/src/Storages/MutationCommands.h index 2600112f744..014a227dff3 100644 --- a/src/Storages/MutationCommands.h +++ b/src/Storages/MutationCommands.h @@ -53,7 +53,7 @@ struct MutationCommand /// For MATERIALIZE INDEX and PROJECTION and STATISTIC String index_name = {}; String projection_name = {}; - String statistic_column_name = {}; + std::vector statistic_columns = {}; /// For MATERIALIZE INDEX, UPDATE and DELETE. ASTPtr partition = {}; diff --git a/src/Storages/Statistic/Statistic.cpp b/src/Storages/Statistic/Statistic.cpp index 4ea2951b63d..17aa6a76f1c 100644 --- a/src/Storages/Statistic/Statistic.cpp +++ b/src/Storages/Statistic/Statistic.cpp @@ -135,7 +135,7 @@ StatisticPtr TDigestCreator(const StatisticDescription & stat) return StatisticPtr(new TDigestStatistic(stat)); } -void MergeTreeStatisticFactory::registerCreator(const std::string & stat_type, Creator creator) +void MergeTreeStatisticFactory::registerCreator(StatisticType stat_type, Creator creator) { if (!creators.emplace(stat_type, std::move(creator)).second) throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeStatisticFactory: the statistic creator type {} is not unique", stat_type); @@ -143,7 +143,7 @@ void MergeTreeStatisticFactory::registerCreator(const std::string & stat_type, C MergeTreeStatisticFactory::MergeTreeStatisticFactory() { - registerCreator("tdigest", TDigestCreator); + registerCreator(TDigest, TDigestCreator); ///registerCreator("cm_sketch", CMSketchCreator); } @@ -160,16 +160,7 @@ StatisticPtr MergeTreeStatisticFactory::get(const StatisticDescription & stat) c if (it == creators.end()) { throw Exception(ErrorCodes::INCORRECT_QUERY, - "Unknown Statistic type '{}'. Available types: {}", stat.type, - std::accumulate(creators.cbegin(), creators.cend(), std::string{}, - [] (auto && left, const auto & right) -> std::string - { - if (left.empty()) - return right.first; - else - return left + ", " + right.first; - }) - ); + "Unknown Statistic type '{}'. Available types: tdigest", stat.type); } return std::make_shared(stat); } diff --git a/src/Storages/Statistic/Statistic.h b/src/Storages/Statistic/Statistic.h index 7db4594eb5f..eb05649f0a6 100644 --- a/src/Storages/Statistic/Statistic.h +++ b/src/Storages/Statistic/Statistic.h @@ -43,10 +43,10 @@ public: return stat.column_name; } - const String & type() const - { - return stat.type; - } + //const String & type() const + //{ + // return stat.type; + //} virtual void serialize(WriteBuffer & buf) = 0; @@ -118,13 +118,13 @@ public: Statistics getMany(const std::vector & stats) const; - void registerCreator(const std::string & type, Creator creator); + void registerCreator(StatisticType type, Creator creator); protected: MergeTreeStatisticFactory(); private: - using Creators = std::unordered_map; + using Creators = std::unordered_map; Creators creators; }; diff --git a/src/Storages/StatisticsDescription.cpp b/src/Storages/StatisticsDescription.cpp index 2dd8d7ab8e6..0fff9581d57 100644 --- a/src/Storages/StatisticsDescription.cpp +++ b/src/Storages/StatisticsDescription.cpp @@ -10,6 +10,8 @@ #include #include +#include + namespace DB { @@ -19,58 +21,51 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; }; -StatisticDescription StatisticDescription::getStatisticFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, ContextPtr context) +StatisticType StatisticDescription::stringToType(String type) +{ + if (type.empty()) + return TDigest; + if (type == "tdigest") + return TDigest; + throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type: {}", type); +} + +StatisticsDescriptions StatisticsDescriptions::getStatisticsFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, ContextPtr context) { const auto * stat_definition = definition_ast->as(); if (!stat_definition) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create statistic from non ASTStatisticDeclaration AST"); - StatisticDescription stat; - stat.definition_ast = definition_ast->clone(); - stat.type = Poco::toLower(stat_definition->type); - if (stat.type != "tdigest") - throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect type name {}", stat.type); - String column_name = stat_definition->column_name; + LOG_INFO(&Poco::Logger::get("stats_desc"), "stat_def is like {}", stat_definition->dumpTree()); - if (!columns.hasPhysical(column_name)) - throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect column name {}", column_name); + StatisticsDescriptions stats; + for (const auto & column_ast : stat_definition->columns->children) + { + StatisticDescription stat; + stat.type = StatisticDescription::stringToType(Poco::toLower(stat_definition->type)); + String column_name = column_ast->as().name(); - const auto & column = columns.getPhysical(column_name); - stat.column_name = column.name; - /// TODO: check if it is numeric. - stat.data_type = column.type; + if (!columns.hasPhysical(column_name)) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect column name {}", column_name); + + const auto & column = columns.getPhysical(column_name); + stat.column_name = column.name; + /// TODO: check if it is numeric. + stat.data_type = column.type; + stats.push_back(stat); + } + stats.definition_asts.push_back(definition_ast); + + if (stats.empty()) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Empty statistic column list"); + + LOG_INFO(&Poco::Logger::get("stats_desc"), "there are {} stats", stats.size()); UNUSED(context); - return stat; + return stats; } -StatisticDescription::StatisticDescription(const StatisticDescription & other) - : definition_ast(other.definition_ast ? other.definition_ast->clone() : nullptr) - , type(other.type) - , column_name(other.column_name) - , data_type(other.data_type) -{ -} - -StatisticDescription & StatisticDescription::operator=(const StatisticDescription & other) -{ - if (&other == this) - return *this; - - if (other.definition_ast) - definition_ast = other.definition_ast->clone(); - else - definition_ast.reset(); - - type = other.type; - column_name = other.column_name; - data_type = other.data_type; - - return *this; -} - - bool StatisticsDescriptions::has(const String & name) const { for (const auto & statistic : *this) @@ -79,31 +74,22 @@ bool StatisticsDescriptions::has(const String & name) const return false; } +void StatisticsDescriptions::merge(const StatisticsDescriptions & other) +{ + insert(end(), other.begin(), other.end()); + definition_asts.insert(definition_asts.end(), other.definition_asts.begin(), other.definition_asts.end()); +} + String StatisticsDescriptions::toString() const { if (empty()) return {}; ASTExpressionList list; - for (const auto & statistic : *this) - list.children.push_back(statistic.definition_ast); + for (const auto & ast : definition_asts) + list.children.push_back(ast); return serializeAST(list); } -StatisticsDescriptions StatisticsDescriptions::parse(const String & str, const ColumnsDescription & columns, ContextPtr context) -{ - StatisticsDescriptions result; - if (str.empty()) - return result; - - ParserStatisticDeclaration parser; - ASTPtr list = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - - for (const auto & index : list->children) - result.emplace_back(StatisticDescription::getStatisticFromAST(index, columns, context)); - - return result; -} - } diff --git a/src/Storages/StatisticsDescription.h b/src/Storages/StatisticsDescription.h index ea05260b4d8..b018ce26665 100644 --- a/src/Storages/StatisticsDescription.h +++ b/src/Storages/StatisticsDescription.h @@ -7,13 +7,15 @@ namespace DB { +enum StatisticType +{ + TDigest = 0, +}; + struct StatisticDescription { - /// Definition AST of statistic - ASTPtr definition_ast; - /// the type of statistic, right now it's only tdigest. - String type; + StatisticType type; /// Names of statistic columns String column_name; @@ -21,24 +23,22 @@ struct StatisticDescription /// Data types of statistic columns DataTypePtr data_type; - static StatisticDescription getStatisticFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, ContextPtr context); - StatisticDescription() = default; - /// We need custom copy constructors because we don't want - /// unintentionaly share AST variables and modify them. - StatisticDescription(const StatisticDescription & other); - StatisticDescription & operator=(const StatisticDescription & other); + static StatisticType stringToType(String type); }; struct StatisticsDescriptions : public std::vector { + std::vector definition_asts; /// Stat with name exists bool has(const String & name) const; + /// merge with other Statistics + void merge(const StatisticsDescriptions & other); /// Convert description to string String toString() const; /// Parse description from string - static StatisticsDescriptions parse(const String & str, const ColumnsDescription & columns, ContextPtr context); + static StatisticsDescriptions getStatisticsFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, ContextPtr context); }; } diff --git a/tests/queries/0_stateless/02864_statistic_operate.reference b/tests/queries/0_stateless/02864_statistic_operate.reference new file mode 100644 index 00000000000..424b16cb8a5 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistic_operate.reference @@ -0,0 +1,22 @@ +CREATE TABLE default.t1\n(\n `a` Int64,\n `b` Float64,\n `pk` String,\n STATISTIC a, b TYPE tdigest\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS index_granularity = 8192 +After insert +SELECT count() +FROM t1 +PREWHERE (a < 10) AND (b < 10) +10 +After drop statistic +SELECT count() +FROM t1 +PREWHERE (b < 10) AND (a < 10) +10 +After add statistic +After materialize statistic +SELECT count() +FROM t1 +PREWHERE (a < 10) AND (b < 10) +20 +After merge +SELECT count() +FROM t1 +PREWHERE (a < 10) AND (b < 10) +20 diff --git a/tests/queries/0_stateless/02864_statistic_operate.sql b/tests/queries/0_stateless/02864_statistic_operate.sql index 5358fd7dde9..6b74cc37e2a 100644 --- a/tests/queries/0_stateless/02864_statistic_operate.sql +++ b/tests/queries/0_stateless/02864_statistic_operate.sql @@ -8,8 +8,7 @@ CREATE TABLE t1 a Int64, b Float64, pk String, - STATISTIC a TYPE tdigest, - STATISTIC b TYPE tdigest + STATISTIC a, b TYPE tdigest, ) Engine = MergeTree() ORDER BY pk; SHOW CREATE TABLE t1; @@ -20,18 +19,21 @@ SELECT 'After insert'; EXPLAIN SYNTAX SELECT count(*) FROM t1 WHERE b < 10 and a < 10; SELECT count(*) FROM t1 WHERE b < 10 and a < 10; -ALTER TABLE t1 DROP STATISTIC a TYPE tdigest; -ALTER TABLE t1 DROP STATISTIC b TYPE tdigest; +ALTER TABLE t1 DROP STATISTIC a, b TYPE tdigest; SELECT 'After drop statistic'; EXPLAIN SYNTAX SELECT count(*) FROM t1 WHERE b < 10 and a < 10; SELECT count(*) FROM t1 WHERE b < 10 and a < 10; -ALTER TABLE t1 ADD STATISTIC a TYPE tdigest; -ALTER TABLE t1 ADD STATISTIC b TYPE tdigest; +--SHOW CREATE TABLE t1; -ALTER TABLE t1 MATERIALIZE STATISTIC a TYPE tdigest; -ALTER TABLE t1 MATERIALIZE STATISTIC b TYPE tdigest; +ALTER TABLE t1 ADD STATISTIC a, b TYPE tdigest; + +SELECT 'After add statistic'; + +--SHOW CREATE TABLE t1; + +ALTER TABLE t1 MATERIALIZE STATISTIC a, b TYPE tdigest; INSERT INTO t1 select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; SELECT 'After materialize statistic';