mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-27 10:02:01 +00:00
update docs and refine statements
This commit is contained in:
parent
f60dad0598
commit
ddcb64f39f
@ -44,7 +44,10 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
INDEX index_name2 expr2 TYPE type2(...) [GRANULARITY value2],
|
||||
...
|
||||
PROJECTION projection_name_1 (SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]),
|
||||
PROJECTION projection_name_2 (SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY])
|
||||
PROJECTION projection_name_2 (SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]),
|
||||
...
|
||||
STATISTIC <COLUMN LIST> TYPE type1,
|
||||
STATISTIC <COLUMN LIST> TYPE type2
|
||||
) ENGINE = MergeTree()
|
||||
ORDER BY expr
|
||||
[PARTITION BY expr]
|
||||
@ -1353,3 +1356,22 @@ In this sample configuration:
|
||||
- `_part_uuid` — Unique part identifier (if enabled MergeTree setting `assign_part_uuids`).
|
||||
- `_partition_value` — Values (a tuple) of a `partition by` expression.
|
||||
- `_sample_factor` — Sample factor (from the query).
|
||||
|
||||
## Column Statistics (Experimental) {#column-statistics}
|
||||
|
||||
The statistic declaration is in the columns section of the `CREATE` query.
|
||||
|
||||
``` sql
|
||||
STATISTIC <list of columns> TYPE type
|
||||
```
|
||||
|
||||
For tables from the `*MergeTree` family, statistics can be specified.
|
||||
|
||||
These lightweight statistics aggregate information about distribution of values in columns.
|
||||
They can be used for query optimization (At current time they are used for moving expressions to PREWHERE).
|
||||
|
||||
#### Available Types of Column Statistics {#available-types-of-column-statistics}
|
||||
|
||||
- `tdigest`
|
||||
|
||||
Stores distribution of values from numeric columns in [TDigest](https://github.com/tdunning/t-digest) sketch.
|
||||
|
@ -459,8 +459,8 @@ ASTPtr InterpreterCreateQuery::formatStatistics(const StatisticsDescriptions & s
|
||||
{
|
||||
auto res = std::make_shared<ASTExpressionList>();
|
||||
|
||||
for (const auto & statistic : statistics)
|
||||
res->children.push_back(statistic.definition_ast->clone());
|
||||
for (const auto & definition_ast : statistics.definition_asts)
|
||||
res->children.push_back(definition_ast->clone());
|
||||
|
||||
return res;
|
||||
}
|
||||
@ -721,8 +721,10 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
|
||||
}
|
||||
if (create.columns_list->stats)
|
||||
for (const auto & statistic : create.columns_list->stats->children)
|
||||
properties.stats.push_back(
|
||||
StatisticDescription::getStatisticFromAST(statistic->clone(), properties.columns, getContext()));
|
||||
{
|
||||
auto stats = StatisticsDescriptions::getStatisticsFromAST(statistic->clone(), properties.columns, getContext());
|
||||
properties.stats.merge(stats);
|
||||
}
|
||||
|
||||
if (create.columns_list->projections)
|
||||
for (const auto & projection_ast : create.columns_list->projections->children)
|
||||
|
@ -724,17 +724,20 @@ void MutationsInterpreter::prepare(bool dry_run)
|
||||
else if (command.type == MutationCommand::MATERIALIZE_STATISTIC)
|
||||
{
|
||||
mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION);
|
||||
auto it = std::find_if(
|
||||
std::cbegin(statistics_desc), std::end(statistics_desc),
|
||||
[&](const StatisticDescription & statistic)
|
||||
{
|
||||
return statistic.column_name == command.statistic_column_name;
|
||||
});
|
||||
if (it == std::cend(statistics_desc))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown statistic column: {}", command.statistic_column_name);
|
||||
for (const auto & stat_column_name: command.statistic_columns)
|
||||
{
|
||||
auto it = std::find_if(
|
||||
std::cbegin(statistics_desc), std::end(statistics_desc),
|
||||
[&](const StatisticDescription & statistic)
|
||||
{
|
||||
return statistic.column_name == stat_column_name;
|
||||
});
|
||||
if (it == std::cend(statistics_desc))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown statistic column: {}", stat_column_name);
|
||||
|
||||
dependencies.emplace(it->column_name, ColumnDependency::STATISTIC);
|
||||
materialized_statistics.emplace(command.statistic_column_name);
|
||||
dependencies.emplace(it->column_name, ColumnDependency::STATISTIC);
|
||||
materialized_statistics.emplace(stat_column_name);
|
||||
}
|
||||
}
|
||||
else if (command.type == MutationCommand::MATERIALIZE_PROJECTION)
|
||||
{
|
||||
@ -755,7 +758,8 @@ void MutationsInterpreter::prepare(bool dry_run)
|
||||
else if (command.type == MutationCommand::DROP_STATISTIC)
|
||||
{
|
||||
mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION);
|
||||
materialized_statistics.erase(command.statistic_column_name);
|
||||
for (const auto & stat_column_name: command.statistic_columns)
|
||||
materialized_statistics.erase(stat_column_name);
|
||||
}
|
||||
else if (command.type == MutationCommand::DROP_PROJECTION)
|
||||
{
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <Parsers/ASTStatisticDeclaration.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
|
||||
#include <Common/quoteString.h>
|
||||
#include <IO/Operators.h>
|
||||
@ -12,17 +13,27 @@ ASTPtr ASTStatisticDeclaration::clone() const
|
||||
{
|
||||
auto res = std::make_shared<ASTStatisticDeclaration>();
|
||||
|
||||
res->column_name = column_name;
|
||||
res->set(res->columns, columns->clone());
|
||||
res->type = type;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
void ASTStatisticDeclaration::formatImpl(const FormatSettings & s, FormatState &, FormatStateStacked) const
|
||||
std::vector<String> ASTStatisticDeclaration::getColumnNames() const
|
||||
{
|
||||
s.ostr << backQuoteIfNeed(column_name);
|
||||
s.ostr << " ";
|
||||
std::vector<String> result;
|
||||
result.reserve(columns->children.size());
|
||||
for (const ASTPtr & column_ast : columns->children)
|
||||
{
|
||||
result.push_back(column_ast->as<ASTIdentifier &>().name());
|
||||
}
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
void ASTStatisticDeclaration::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const
|
||||
{
|
||||
columns->formatImpl(s, state, frame);
|
||||
s.ostr << (s.hilite ? hilite_keyword : "") << " TYPE " << (s.hilite ? hilite_none : "");
|
||||
s.ostr << backQuoteIfNeed(type);
|
||||
}
|
||||
|
@ -12,13 +12,15 @@ class ASTFunction;
|
||||
class ASTStatisticDeclaration : public IAST
|
||||
{
|
||||
public:
|
||||
String column_name;
|
||||
/// We do not support to set bucket number for tdigest
|
||||
IAST * columns;
|
||||
/// TODO type should be a list of ASTFunction, for example, 'tdigest(256), hyperloglog(128)', etc.
|
||||
String type;
|
||||
|
||||
/** Get the text that identifies this element. */
|
||||
String getID(char) const override { return "Stat"; }
|
||||
|
||||
std::vector<String> getColumnNames() const;
|
||||
|
||||
ASTPtr clone() const override;
|
||||
void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override;
|
||||
};
|
||||
|
@ -165,14 +165,13 @@ bool ParserStatisticDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected &
|
||||
{
|
||||
ParserKeyword s_type("TYPE");
|
||||
|
||||
ParserIdentifier name_p;
|
||||
ParserList columns_p(std::make_unique<ParserIdentifier>(), std::make_unique<ParserToken>(TokenType::Comma), false);
|
||||
ParserIdentifier type_p;
|
||||
|
||||
ASTPtr name;
|
||||
ASTPtr column;
|
||||
ASTPtr columns;
|
||||
ASTPtr type;
|
||||
|
||||
if (!name_p.parse(pos, name, expected))
|
||||
if (!columns_p.parse(pos, columns, expected))
|
||||
return false;
|
||||
|
||||
if (!s_type.ignore(pos, expected))
|
||||
@ -182,7 +181,7 @@ bool ParserStatisticDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected &
|
||||
return false;
|
||||
|
||||
auto stat = std::make_shared<ASTStatisticDeclaration>();
|
||||
stat->column_name = name->as<ASTIdentifier &>().name();
|
||||
stat->set(stat->columns, columns);
|
||||
stat->type = type->as<ASTIdentifier &>().name();
|
||||
node = stat;
|
||||
|
||||
|
@ -242,9 +242,8 @@ std::optional<AlterCommand> AlterCommand::parse(const ASTAlterCommand * command_
|
||||
|
||||
const auto & ast_stat_decl = command_ast->statistic_decl->as<ASTStatisticDeclaration &>();
|
||||
|
||||
command.statistic_column_name = ast_stat_decl.column_name;
|
||||
command.statistic_columns = ast_stat_decl.getColumnNames();
|
||||
command.statistic_type = ast_stat_decl.type;
|
||||
|
||||
command.if_not_exists = command_ast->if_not_exists;
|
||||
|
||||
return command;
|
||||
@ -315,7 +314,7 @@ std::optional<AlterCommand> AlterCommand::parse(const ASTAlterCommand * command_
|
||||
command.type = AlterCommand::DROP_STATISTIC;
|
||||
const auto & ast_stat_decl = command_ast->statistic_decl->as<ASTStatisticDeclaration &>();
|
||||
|
||||
command.statistic_column_name = ast_stat_decl.column_name;
|
||||
command.statistic_columns = ast_stat_decl.getColumnNames();
|
||||
command.statistic_type = ast_stat_decl.type;
|
||||
command.if_exists = command_ast->if_exists;
|
||||
command.clear = command_ast->clear_statistic;
|
||||
@ -589,45 +588,47 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
|
||||
}
|
||||
else if (type == ADD_STATISTIC)
|
||||
{
|
||||
if (!if_not_exists && std::any_of(
|
||||
metadata.statistics.cbegin(),
|
||||
metadata.statistics.cend(),
|
||||
[this](const auto & statistic)
|
||||
{
|
||||
return statistic.column_name == statistic_column_name && statistic.type == statistic_type;
|
||||
}))
|
||||
/// TODO: Right now we assume there is only one type of statistics for simple implement.
|
||||
for (const auto & statistic_column_name : statistic_columns)
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot add statistic {} with type {}: statistic on this column with this type already exists", statistic_column_name, statistic_type);
|
||||
if (!if_not_exists && std::any_of(
|
||||
metadata.statistics.cbegin(),
|
||||
metadata.statistics.cend(),
|
||||
[&](const auto & statistic)
|
||||
{
|
||||
return statistic.column_name == statistic_column_name;
|
||||
}))
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot add statistic {} with type {}: statistic on this column with this type already exists", statistic_column_name, statistic_type);
|
||||
}
|
||||
}
|
||||
|
||||
auto insert_it = metadata.statistics.end();
|
||||
|
||||
/// insert the index in the beginning of the indices list
|
||||
if (first)
|
||||
insert_it = metadata.statistics.begin();
|
||||
|
||||
metadata.statistics.emplace(insert_it, StatisticDescription::getStatisticFromAST(statistic_decl, metadata.columns, context));
|
||||
auto stats = StatisticsDescriptions::getStatisticsFromAST(statistic_decl, metadata.columns, context);
|
||||
metadata.statistics.merge(stats);
|
||||
}
|
||||
else if (type == DROP_STATISTIC)
|
||||
{
|
||||
if (!partition && !clear)
|
||||
{
|
||||
auto erase_it = std::find_if(
|
||||
metadata.statistics.begin(),
|
||||
metadata.statistics.end(),
|
||||
[this](const auto & statistic)
|
||||
{
|
||||
return statistic.column_name == statistic_column_name && statistic.type == statistic_type;
|
||||
});
|
||||
|
||||
if (erase_it == metadata.statistics.end())
|
||||
for (const auto & stat_column_name : statistic_columns)
|
||||
{
|
||||
if (if_exists)
|
||||
return;
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong statistic name. Cannot find statistic {} with type {} to drop", backQuote(statistic_column_name), statistic_type);
|
||||
}
|
||||
auto erase_it = std::find_if(
|
||||
metadata.statistics.begin(),
|
||||
metadata.statistics.end(),
|
||||
[stat_column_name](const auto & statistic)
|
||||
{
|
||||
return statistic.column_name == stat_column_name;
|
||||
});
|
||||
|
||||
metadata.statistics.erase(erase_it);
|
||||
if (erase_it == metadata.statistics.end())
|
||||
{
|
||||
if (if_exists)
|
||||
return;
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong statistic name. Cannot find statistic {} with type {} to drop", backQuote(stat_column_name), statistic_type);
|
||||
}
|
||||
LOG_INFO(&Poco::Logger::get("drop_stat"), "dropping statistic {}", erase_it->column_name);
|
||||
metadata.statistics.erase(erase_it);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (type == ADD_CONSTRAINT)
|
||||
@ -958,7 +959,7 @@ std::optional<MutationCommand> AlterCommand::tryConvertToMutationCommand(Storage
|
||||
else if (type == DROP_STATISTIC)
|
||||
{
|
||||
result.type = MutationCommand::Type::DROP_STATISTIC;
|
||||
result.column_name = statistic_column_name;
|
||||
result.statistic_columns = statistic_columns;
|
||||
|
||||
if (clear)
|
||||
result.clear = true;
|
||||
|
@ -121,7 +121,7 @@ struct AlterCommand
|
||||
String projection_name;
|
||||
|
||||
ASTPtr statistic_decl = nullptr;
|
||||
String statistic_column_name;
|
||||
std::vector<String> statistic_columns;
|
||||
String statistic_type;
|
||||
|
||||
/// For MODIFY TTL
|
||||
|
@ -580,9 +580,11 @@ static StoragePtr create(const StorageFactory::Arguments & args)
|
||||
metadata.secondary_indices.push_back(IndexDescription::getIndexFromAST(index, columns, context));
|
||||
|
||||
if (args.query.columns_list && args.query.columns_list->stats)
|
||||
for (const auto & stat : args.query.columns_list->stats->children)
|
||||
metadata.statistics.push_back(
|
||||
StatisticDescription::getStatisticFromAST(stat, columns, args.getContext()));
|
||||
for (const auto & stat_ast : args.query.columns_list->stats->children)
|
||||
{
|
||||
auto stats = StatisticsDescriptions::getStatisticsFromAST(stat_ast, columns, args.getContext());
|
||||
metadata.statistics.merge(stats);
|
||||
}
|
||||
|
||||
if (args.query.columns_list && args.query.columns_list->projections)
|
||||
for (auto & projection_ast : args.query.columns_list->projections->children)
|
||||
|
@ -77,7 +77,11 @@ std::optional<MutationCommand> MutationCommand::parse(ASTAlterCommand * command,
|
||||
res.type = MATERIALIZE_STATISTIC;
|
||||
res.partition = command->partition;
|
||||
res.predicate = nullptr;
|
||||
res.statistic_column_name = command->statistic_decl->as<ASTStatisticDeclaration &>().column_name;
|
||||
for (const ASTPtr & column_ast : command->statistic_decl->as<ASTStatisticDeclaration &>().columns->children)
|
||||
{
|
||||
const auto & column = column_ast->as<ASTIdentifier &>().getColumnName();
|
||||
res.statistic_columns.push_back(column);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
else if (command->type == ASTAlterCommand::MATERIALIZE_PROJECTION)
|
||||
|
@ -53,7 +53,7 @@ struct MutationCommand
|
||||
/// For MATERIALIZE INDEX and PROJECTION and STATISTIC
|
||||
String index_name = {};
|
||||
String projection_name = {};
|
||||
String statistic_column_name = {};
|
||||
std::vector<String> statistic_columns = {};
|
||||
|
||||
/// For MATERIALIZE INDEX, UPDATE and DELETE.
|
||||
ASTPtr partition = {};
|
||||
|
@ -135,7 +135,7 @@ StatisticPtr TDigestCreator(const StatisticDescription & stat)
|
||||
return StatisticPtr(new TDigestStatistic(stat));
|
||||
}
|
||||
|
||||
void MergeTreeStatisticFactory::registerCreator(const std::string & stat_type, Creator creator)
|
||||
void MergeTreeStatisticFactory::registerCreator(StatisticType stat_type, Creator creator)
|
||||
{
|
||||
if (!creators.emplace(stat_type, std::move(creator)).second)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeStatisticFactory: the statistic creator type {} is not unique", stat_type);
|
||||
@ -143,7 +143,7 @@ void MergeTreeStatisticFactory::registerCreator(const std::string & stat_type, C
|
||||
|
||||
MergeTreeStatisticFactory::MergeTreeStatisticFactory()
|
||||
{
|
||||
registerCreator("tdigest", TDigestCreator);
|
||||
registerCreator(TDigest, TDigestCreator);
|
||||
|
||||
///registerCreator("cm_sketch", CMSketchCreator);
|
||||
}
|
||||
@ -160,16 +160,7 @@ StatisticPtr MergeTreeStatisticFactory::get(const StatisticDescription & stat) c
|
||||
if (it == creators.end())
|
||||
{
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY,
|
||||
"Unknown Statistic type '{}'. Available types: {}", stat.type,
|
||||
std::accumulate(creators.cbegin(), creators.cend(), std::string{},
|
||||
[] (auto && left, const auto & right) -> std::string
|
||||
{
|
||||
if (left.empty())
|
||||
return right.first;
|
||||
else
|
||||
return left + ", " + right.first;
|
||||
})
|
||||
);
|
||||
"Unknown Statistic type '{}'. Available types: tdigest", stat.type);
|
||||
}
|
||||
return std::make_shared<TDigestStatistic>(stat);
|
||||
}
|
||||
|
@ -43,10 +43,10 @@ public:
|
||||
return stat.column_name;
|
||||
}
|
||||
|
||||
const String & type() const
|
||||
{
|
||||
return stat.type;
|
||||
}
|
||||
//const String & type() const
|
||||
//{
|
||||
// return stat.type;
|
||||
//}
|
||||
|
||||
virtual void serialize(WriteBuffer & buf) = 0;
|
||||
|
||||
@ -118,13 +118,13 @@ public:
|
||||
|
||||
Statistics getMany(const std::vector<StatisticDescription> & stats) const;
|
||||
|
||||
void registerCreator(const std::string & type, Creator creator);
|
||||
void registerCreator(StatisticType type, Creator creator);
|
||||
|
||||
protected:
|
||||
MergeTreeStatisticFactory();
|
||||
|
||||
private:
|
||||
using Creators = std::unordered_map<std::string, Creator>;
|
||||
using Creators = std::unordered_map<StatisticType, Creator>;
|
||||
Creators creators;
|
||||
};
|
||||
|
||||
|
@ -10,6 +10,8 @@
|
||||
#include <Storages/extractKeyExpressionList.h>
|
||||
#include <Storages/StatisticsDescription.h>
|
||||
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -19,58 +21,51 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
};
|
||||
|
||||
StatisticDescription StatisticDescription::getStatisticFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, ContextPtr context)
|
||||
StatisticType StatisticDescription::stringToType(String type)
|
||||
{
|
||||
if (type.empty())
|
||||
return TDigest;
|
||||
if (type == "tdigest")
|
||||
return TDigest;
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type: {}", type);
|
||||
}
|
||||
|
||||
StatisticsDescriptions StatisticsDescriptions::getStatisticsFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, ContextPtr context)
|
||||
{
|
||||
const auto * stat_definition = definition_ast->as<ASTStatisticDeclaration>();
|
||||
if (!stat_definition)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create statistic from non ASTStatisticDeclaration AST");
|
||||
|
||||
StatisticDescription stat;
|
||||
stat.definition_ast = definition_ast->clone();
|
||||
stat.type = Poco::toLower(stat_definition->type);
|
||||
if (stat.type != "tdigest")
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect type name {}", stat.type);
|
||||
String column_name = stat_definition->column_name;
|
||||
LOG_INFO(&Poco::Logger::get("stats_desc"), "stat_def is like {}", stat_definition->dumpTree());
|
||||
|
||||
if (!columns.hasPhysical(column_name))
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect column name {}", column_name);
|
||||
StatisticsDescriptions stats;
|
||||
for (const auto & column_ast : stat_definition->columns->children)
|
||||
{
|
||||
StatisticDescription stat;
|
||||
stat.type = StatisticDescription::stringToType(Poco::toLower(stat_definition->type));
|
||||
String column_name = column_ast->as<ASTIdentifier &>().name();
|
||||
|
||||
const auto & column = columns.getPhysical(column_name);
|
||||
stat.column_name = column.name;
|
||||
/// TODO: check if it is numeric.
|
||||
stat.data_type = column.type;
|
||||
if (!columns.hasPhysical(column_name))
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect column name {}", column_name);
|
||||
|
||||
const auto & column = columns.getPhysical(column_name);
|
||||
stat.column_name = column.name;
|
||||
/// TODO: check if it is numeric.
|
||||
stat.data_type = column.type;
|
||||
stats.push_back(stat);
|
||||
}
|
||||
stats.definition_asts.push_back(definition_ast);
|
||||
|
||||
if (stats.empty())
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Empty statistic column list");
|
||||
|
||||
LOG_INFO(&Poco::Logger::get("stats_desc"), "there are {} stats", stats.size());
|
||||
|
||||
UNUSED(context);
|
||||
|
||||
return stat;
|
||||
return stats;
|
||||
}
|
||||
|
||||
StatisticDescription::StatisticDescription(const StatisticDescription & other)
|
||||
: definition_ast(other.definition_ast ? other.definition_ast->clone() : nullptr)
|
||||
, type(other.type)
|
||||
, column_name(other.column_name)
|
||||
, data_type(other.data_type)
|
||||
{
|
||||
}
|
||||
|
||||
StatisticDescription & StatisticDescription::operator=(const StatisticDescription & other)
|
||||
{
|
||||
if (&other == this)
|
||||
return *this;
|
||||
|
||||
if (other.definition_ast)
|
||||
definition_ast = other.definition_ast->clone();
|
||||
else
|
||||
definition_ast.reset();
|
||||
|
||||
type = other.type;
|
||||
column_name = other.column_name;
|
||||
data_type = other.data_type;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
bool StatisticsDescriptions::has(const String & name) const
|
||||
{
|
||||
for (const auto & statistic : *this)
|
||||
@ -79,31 +74,22 @@ bool StatisticsDescriptions::has(const String & name) const
|
||||
return false;
|
||||
}
|
||||
|
||||
void StatisticsDescriptions::merge(const StatisticsDescriptions & other)
|
||||
{
|
||||
insert(end(), other.begin(), other.end());
|
||||
definition_asts.insert(definition_asts.end(), other.definition_asts.begin(), other.definition_asts.end());
|
||||
}
|
||||
|
||||
String StatisticsDescriptions::toString() const
|
||||
{
|
||||
if (empty())
|
||||
return {};
|
||||
|
||||
ASTExpressionList list;
|
||||
for (const auto & statistic : *this)
|
||||
list.children.push_back(statistic.definition_ast);
|
||||
for (const auto & ast : definition_asts)
|
||||
list.children.push_back(ast);
|
||||
|
||||
return serializeAST(list);
|
||||
}
|
||||
|
||||
StatisticsDescriptions StatisticsDescriptions::parse(const String & str, const ColumnsDescription & columns, ContextPtr context)
|
||||
{
|
||||
StatisticsDescriptions result;
|
||||
if (str.empty())
|
||||
return result;
|
||||
|
||||
ParserStatisticDeclaration parser;
|
||||
ASTPtr list = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
||||
|
||||
for (const auto & index : list->children)
|
||||
result.emplace_back(StatisticDescription::getStatisticFromAST(index, columns, context));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -7,13 +7,15 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
enum StatisticType
|
||||
{
|
||||
TDigest = 0,
|
||||
};
|
||||
|
||||
struct StatisticDescription
|
||||
{
|
||||
/// Definition AST of statistic
|
||||
ASTPtr definition_ast;
|
||||
|
||||
/// the type of statistic, right now it's only tdigest.
|
||||
String type;
|
||||
StatisticType type;
|
||||
|
||||
/// Names of statistic columns
|
||||
String column_name;
|
||||
@ -21,24 +23,22 @@ struct StatisticDescription
|
||||
/// Data types of statistic columns
|
||||
DataTypePtr data_type;
|
||||
|
||||
static StatisticDescription getStatisticFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, ContextPtr context);
|
||||
|
||||
StatisticDescription() = default;
|
||||
|
||||
/// We need custom copy constructors because we don't want
|
||||
/// unintentionaly share AST variables and modify them.
|
||||
StatisticDescription(const StatisticDescription & other);
|
||||
StatisticDescription & operator=(const StatisticDescription & other);
|
||||
static StatisticType stringToType(String type);
|
||||
};
|
||||
|
||||
struct StatisticsDescriptions : public std::vector<StatisticDescription>
|
||||
{
|
||||
std::vector<ASTPtr> definition_asts;
|
||||
/// Stat with name exists
|
||||
bool has(const String & name) const;
|
||||
/// merge with other Statistics
|
||||
void merge(const StatisticsDescriptions & other);
|
||||
/// Convert description to string
|
||||
String toString() const;
|
||||
/// Parse description from string
|
||||
static StatisticsDescriptions parse(const String & str, const ColumnsDescription & columns, ContextPtr context);
|
||||
static StatisticsDescriptions getStatisticsFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, ContextPtr context);
|
||||
};
|
||||
|
||||
}
|
||||
|
22
tests/queries/0_stateless/02864_statistic_operate.reference
Normal file
22
tests/queries/0_stateless/02864_statistic_operate.reference
Normal file
@ -0,0 +1,22 @@
|
||||
CREATE TABLE default.t1\n(\n `a` Int64,\n `b` Float64,\n `pk` String,\n STATISTIC a, b TYPE tdigest\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS index_granularity = 8192
|
||||
After insert
|
||||
SELECT count()
|
||||
FROM t1
|
||||
PREWHERE (a < 10) AND (b < 10)
|
||||
10
|
||||
After drop statistic
|
||||
SELECT count()
|
||||
FROM t1
|
||||
PREWHERE (b < 10) AND (a < 10)
|
||||
10
|
||||
After add statistic
|
||||
After materialize statistic
|
||||
SELECT count()
|
||||
FROM t1
|
||||
PREWHERE (a < 10) AND (b < 10)
|
||||
20
|
||||
After merge
|
||||
SELECT count()
|
||||
FROM t1
|
||||
PREWHERE (a < 10) AND (b < 10)
|
||||
20
|
@ -8,8 +8,7 @@ CREATE TABLE t1
|
||||
a Int64,
|
||||
b Float64,
|
||||
pk String,
|
||||
STATISTIC a TYPE tdigest,
|
||||
STATISTIC b TYPE tdigest
|
||||
STATISTIC a, b TYPE tdigest,
|
||||
) Engine = MergeTree() ORDER BY pk;
|
||||
|
||||
SHOW CREATE TABLE t1;
|
||||
@ -20,18 +19,21 @@ SELECT 'After insert';
|
||||
EXPLAIN SYNTAX SELECT count(*) FROM t1 WHERE b < 10 and a < 10;
|
||||
SELECT count(*) FROM t1 WHERE b < 10 and a < 10;
|
||||
|
||||
ALTER TABLE t1 DROP STATISTIC a TYPE tdigest;
|
||||
ALTER TABLE t1 DROP STATISTIC b TYPE tdigest;
|
||||
ALTER TABLE t1 DROP STATISTIC a, b TYPE tdigest;
|
||||
|
||||
SELECT 'After drop statistic';
|
||||
EXPLAIN SYNTAX SELECT count(*) FROM t1 WHERE b < 10 and a < 10;
|
||||
SELECT count(*) FROM t1 WHERE b < 10 and a < 10;
|
||||
|
||||
ALTER TABLE t1 ADD STATISTIC a TYPE tdigest;
|
||||
ALTER TABLE t1 ADD STATISTIC b TYPE tdigest;
|
||||
--SHOW CREATE TABLE t1;
|
||||
|
||||
ALTER TABLE t1 MATERIALIZE STATISTIC a TYPE tdigest;
|
||||
ALTER TABLE t1 MATERIALIZE STATISTIC b TYPE tdigest;
|
||||
ALTER TABLE t1 ADD STATISTIC a, b TYPE tdigest;
|
||||
|
||||
SELECT 'After add statistic';
|
||||
|
||||
--SHOW CREATE TABLE t1;
|
||||
|
||||
ALTER TABLE t1 MATERIALIZE STATISTIC a, b TYPE tdigest;
|
||||
INSERT INTO t1 select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000;
|
||||
|
||||
SELECT 'After materialize statistic';
|
||||
|
Loading…
Reference in New Issue
Block a user