ClickHouse/src/Storages/TTLDescription.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

396 lines
14 KiB
C++
Raw Normal View History

2020-05-28 15:34:33 +00:00
#include <Storages/TTLDescription.h>
2021-01-12 00:40:07 +00:00
#include <AggregateFunctions/AggregateFunctionFactory.h>
2020-05-28 15:34:33 +00:00
#include <Functions/IFunction.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/TreeRewriter.h>
#include <Interpreters/InDepthNodeVisitor.h>
2021-01-12 16:42:49 +00:00
#include <Interpreters/addTypeConversionToAST.h>
2020-05-28 15:34:33 +00:00
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTTTLElement.h>
#include <Parsers/ASTIdentifier.h>
2021-01-12 00:40:07 +00:00
#include <Parsers/ASTAssignment.h>
2020-05-28 15:34:33 +00:00
#include <Storages/ColumnsDescription.h>
2020-08-31 11:35:53 +00:00
#include <Interpreters/Context.h>
2020-05-28 15:34:33 +00:00
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
2022-01-27 08:33:40 +00:00
#include <Interpreters/FunctionNameNormalizer.h>
2022-01-28 11:07:59 +00:00
#include <Parsers/ExpressionListParsers.h>
2022-01-27 08:33:40 +00:00
#include <Parsers/parseQuery.h>
2020-05-28 15:34:33 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int BAD_TTL_EXPRESSION;
}
2020-06-12 14:32:47 +00:00
TTLAggregateDescription::TTLAggregateDescription(const TTLAggregateDescription & other)
: column_name(other.column_name)
, expression_result_column_name(other.expression_result_column_name)
{
if (other.expression)
2020-11-03 11:28:28 +00:00
expression = other.expression->clone();
2020-06-12 14:32:47 +00:00
}
TTLAggregateDescription & TTLAggregateDescription::operator=(const TTLAggregateDescription & other)
{
2020-06-13 08:51:07 +00:00
if (&other == this)
return *this;
2020-06-12 14:32:47 +00:00
column_name = other.column_name;
expression_result_column_name = other.expression_result_column_name;
if (other.expression)
2020-11-03 11:28:28 +00:00
expression = other.expression->clone();
2020-06-12 14:32:47 +00:00
else
expression.reset();
return *this;
}
2020-05-28 15:34:33 +00:00
namespace
{
2023-11-23 15:02:33 +00:00
void checkTTLExpression(const ExpressionActionsPtr & ttl_expression, const String & result_column_name, bool allow_suspicious)
2020-05-28 15:34:33 +00:00
{
2023-11-23 15:02:33 +00:00
/// Do not apply this check in ATTACH queries for compatibility reasons and if explicitly allowed.
if (!allow_suspicious)
2020-05-28 15:34:33 +00:00
{
2023-07-05 18:50:58 +00:00
if (ttl_expression->getRequiredColumns().empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"TTL expression {} does not depend on any of the columns of the table", result_column_name);
for (const auto & action : ttl_expression->getActions())
2020-05-28 15:34:33 +00:00
{
if (action.node->type == ActionsDAG::ActionType::FUNCTION)
{
const IFunctionBase & func = *action.node->function_base;
if (!func.isDeterministic())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"TTL expression cannot contain non-deterministic functions, but contains function {}",
func.getName());
}
2020-05-28 15:34:33 +00:00
}
}
const auto & result_column = ttl_expression->getSampleBlock().getByName(result_column_name);
if (!typeid_cast<const DataTypeDateTime *>(result_column.type.get())
&& !typeid_cast<const DataTypeDate *>(result_column.type.get()))
{
throw Exception(ErrorCodes::BAD_TTL_EXPRESSION,
"TTL expression result column should have DateTime or Date type, but has {}",
result_column.type->getName());
2020-05-28 15:34:33 +00:00
}
}
class FindAggregateFunctionData
{
public:
using TypeToVisit = ASTFunction;
bool has_aggregate_function = false;
void visit(const ASTFunction & func, ASTPtr &)
{
/// Do not throw if found aggregate function inside another aggregate function,
/// because it will be checked, while creating expressions.
2022-06-16 15:41:04 +00:00
if (AggregateUtils::isAggregateFunction(func))
has_aggregate_function = true;
}
};
using FindAggregateFunctionFinderMatcher = OneTypeMatcher<FindAggregateFunctionData>;
using FindAggregateFunctionVisitor = InDepthNodeVisitor<FindAggregateFunctionFinderMatcher, true>;
2020-05-28 15:34:33 +00:00
}
2020-06-05 17:29:40 +00:00
TTLDescription::TTLDescription(const TTLDescription & other)
: mode(other.mode)
, expression_ast(other.expression_ast ? other.expression_ast->clone() : nullptr)
, result_column(other.result_column)
, where_result_column(other.where_result_column)
, group_by_keys(other.group_by_keys)
, set_parts(other.set_parts)
, aggregate_descriptions(other.aggregate_descriptions)
, destination_type(other.destination_type)
, destination_name(other.destination_name)
, if_exists(other.if_exists)
2020-09-01 10:49:53 +00:00
, recompression_codec(other.recompression_codec)
2020-06-05 17:29:40 +00:00
{
2020-06-12 14:32:47 +00:00
if (other.expression)
2020-11-03 11:28:28 +00:00
expression = other.expression->clone();
2020-06-12 14:32:47 +00:00
if (other.where_expression)
2020-11-03 11:28:28 +00:00
where_expression = other.where_expression->clone();
2020-06-05 17:29:40 +00:00
}
TTLDescription & TTLDescription::operator=(const TTLDescription & other)
{
2020-06-09 17:42:04 +00:00
if (&other == this)
return *this;
2020-06-05 17:29:40 +00:00
mode = other.mode;
if (other.expression_ast)
expression_ast = other.expression_ast->clone();
else
expression_ast.reset();
2020-06-12 14:32:47 +00:00
if (other.expression)
2020-11-03 11:28:28 +00:00
expression = other.expression->clone();
2020-06-12 14:32:47 +00:00
else
expression.reset();
2020-06-05 17:29:40 +00:00
result_column = other.result_column;
2020-06-12 14:32:47 +00:00
if (other.where_expression)
2020-11-03 11:28:28 +00:00
where_expression = other.where_expression->clone();
2020-06-12 14:32:47 +00:00
else
where_expression.reset();
2020-06-05 17:29:40 +00:00
where_result_column = other.where_result_column;
group_by_keys = other.group_by_keys;
set_parts = other.set_parts;
aggregate_descriptions = other.aggregate_descriptions;
destination_type = other.destination_type;
destination_name = other.destination_name;
if_exists = other.if_exists;
2020-09-01 10:49:53 +00:00
if (other.recompression_codec)
recompression_codec = other.recompression_codec->clone();
else
recompression_codec.reset();
2020-06-05 17:29:40 +00:00
return * this;
}
2020-05-28 15:34:33 +00:00
TTLDescription TTLDescription::getTTLFromAST(
const ASTPtr & definition_ast,
const ColumnsDescription & columns,
ContextPtr context,
const KeyDescription & primary_key,
bool is_attach)
2020-05-28 15:34:33 +00:00
{
TTLDescription result;
const auto * ttl_element = definition_ast->as<ASTTTLElement>();
/// First child is expression: `TTL expr TO DISK`
if (ttl_element != nullptr)
result.expression_ast = ttl_element->children.front()->clone();
else /// It's columns TTL without any additions, just copy it
result.expression_ast = definition_ast->clone();
auto ttl_ast = result.expression_ast->clone();
auto syntax_analyzer_result = TreeRewriter(context).analyze(ttl_ast, columns.getAllPhysical());
2020-05-28 15:34:33 +00:00
result.expression = ExpressionAnalyzer(ttl_ast, syntax_analyzer_result, context).getActions(false);
result.result_column = ttl_ast->getColumnName();
if (ttl_element == nullptr) /// columns TTL
{
result.destination_type = DataDestinationType::DELETE;
result.mode = TTLMode::DELETE;
}
else /// rows TTL
{
result.mode = ttl_element->mode;
2020-05-28 15:34:33 +00:00
result.destination_type = ttl_element->destination_type;
result.destination_name = ttl_element->destination_name;
result.if_exists = ttl_element->if_exists;
2020-05-28 15:34:33 +00:00
if (ttl_element->mode == TTLMode::DELETE)
{
if (ASTPtr where_expr_ast = ttl_element->where())
{
auto where_syntax_result = TreeRewriter(context).analyze(where_expr_ast, columns.getAllPhysical());
2020-05-28 15:34:33 +00:00
result.where_expression = ExpressionAnalyzer(where_expr_ast, where_syntax_result, context).getActions(false);
result.where_result_column = where_expr_ast->getColumnName();
}
}
else if (ttl_element->mode == TTLMode::GROUP_BY)
{
const auto & pk_columns = primary_key.column_names;
if (ttl_element->group_by_key.size() > pk_columns.size())
throw Exception(ErrorCodes::BAD_TTL_EXPRESSION, "TTL Expression GROUP BY key should be a prefix of primary key");
2020-05-28 15:34:33 +00:00
NameSet aggregation_columns_set;
NameSet used_primary_key_columns_set;
2020-05-28 15:34:33 +00:00
for (size_t i = 0; i < ttl_element->group_by_key.size(); ++i)
{
if (ttl_element->group_by_key[i]->getColumnName() != pk_columns[i])
throw Exception(ErrorCodes::BAD_TTL_EXPRESSION, "TTL Expression GROUP BY key should be a prefix of primary key");
used_primary_key_columns_set.insert(pk_columns[i]);
2020-05-28 15:34:33 +00:00
}
2021-01-12 00:40:07 +00:00
std::vector<std::pair<String, ASTPtr>> aggregations;
for (const auto & ast : ttl_element->group_by_assignments)
{
const auto assignment = ast->as<const ASTAssignment &>();
auto expression = assignment.expression();
FindAggregateFunctionVisitor::Data data{false};
FindAggregateFunctionVisitor(data).visit(expression);
if (!data.has_aggregate_function)
2021-01-12 00:40:07 +00:00
throw Exception(ErrorCodes::BAD_TTL_EXPRESSION,
"Invalid expression for assignment of column {}. Should contain an aggregate function", assignment.column_name);
2021-01-12 00:40:07 +00:00
2021-01-12 16:42:49 +00:00
expression = addTypeConversionToAST(std::move(expression), columns.getPhysical(assignment.column_name).type->getName());
2021-01-12 00:40:07 +00:00
aggregations.emplace_back(assignment.column_name, std::move(expression));
aggregation_columns_set.insert(assignment.column_name);
2021-01-12 00:40:07 +00:00
}
if (aggregation_columns_set.size() != ttl_element->group_by_assignments.size())
throw Exception(ErrorCodes::BAD_TTL_EXPRESSION, "Multiple aggregations set for one column in TTL Expression");
2020-05-28 15:34:33 +00:00
result.group_by_keys = Names(pk_columns.begin(), pk_columns.begin() + ttl_element->group_by_key.size());
const auto & primary_key_expressions = primary_key.expression_list_ast->children;
/// Wrap with 'any' aggregate function primary key columns,
/// which are not in 'GROUP BY' key and was not set explicitly.
/// The separate step, because not all primary key columns are ordinary columns.
for (size_t i = ttl_element->group_by_key.size(); i < primary_key_expressions.size(); ++i)
{
if (!aggregation_columns_set.contains(pk_columns[i]))
2020-12-29 15:19:11 +00:00
{
ASTPtr expr = makeASTFunction("any", primary_key_expressions[i]->clone());
aggregations.emplace_back(pk_columns[i], std::move(expr));
aggregation_columns_set.insert(pk_columns[i]);
}
}
/// Wrap with 'any' aggregate function other columns, which was not set explicitly.
for (const auto & column : columns.getOrdinary())
2020-05-28 15:34:33 +00:00
{
if (!aggregation_columns_set.contains(column.name) && !used_primary_key_columns_set.contains(column.name))
2020-05-28 15:34:33 +00:00
{
ASTPtr expr = makeASTFunction("any", std::make_shared<ASTIdentifier>(column.name));
aggregations.emplace_back(column.name, std::move(expr));
}
}
for (auto [name, value] : aggregations)
{
auto syntax_result = TreeRewriter(context).analyze(value, columns.getAllPhysical(), {}, {}, true);
2020-05-28 15:34:33 +00:00
auto expr_analyzer = ExpressionAnalyzer(value, syntax_result, context);
2020-06-12 14:32:47 +00:00
TTLAggregateDescription set_part;
set_part.column_name = name;
set_part.expression_result_column_name = value->getColumnName();
set_part.expression = expr_analyzer.getActions(false);
result.set_parts.emplace_back(set_part);
2020-05-28 15:34:33 +00:00
for (const auto & descr : expr_analyzer.getAnalyzedData().aggregate_descriptions)
result.aggregate_descriptions.push_back(descr);
}
}
2020-08-31 11:35:53 +00:00
else if (ttl_element->mode == TTLMode::RECOMPRESS)
{
result.recompression_codec =
CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(
ttl_element->recompression_codec, {}, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_deflate_qpl_codec);
2020-08-31 11:35:53 +00:00
}
2020-05-28 15:34:33 +00:00
}
2023-11-23 15:02:33 +00:00
checkTTLExpression(result.expression, result.result_column, is_attach || context->getSettingsRef().allow_suspicious_ttl_expressions);
2020-05-28 15:34:33 +00:00
return result;
}
2020-06-05 17:29:40 +00:00
2020-07-08 23:05:56 +00:00
TTLTableDescription::TTLTableDescription(const TTLTableDescription & other)
: definition_ast(other.definition_ast ? other.definition_ast->clone() : nullptr)
, rows_ttl(other.rows_ttl)
, rows_where_ttl(other.rows_where_ttl)
2020-07-08 23:05:56 +00:00
, move_ttl(other.move_ttl)
2020-09-01 10:49:53 +00:00
, recompression_ttl(other.recompression_ttl)
2020-12-25 14:52:46 +00:00
, group_by_ttl(other.group_by_ttl)
2020-07-08 23:05:56 +00:00
{
}
TTLTableDescription & TTLTableDescription::operator=(const TTLTableDescription & other)
{
if (&other == this)
return *this;
if (other.definition_ast)
definition_ast = other.definition_ast->clone();
else
definition_ast.reset();
rows_ttl = other.rows_ttl;
rows_where_ttl = other.rows_where_ttl;
2020-07-08 23:05:56 +00:00
move_ttl = other.move_ttl;
2020-09-01 10:49:53 +00:00
recompression_ttl = other.recompression_ttl;
2020-12-25 14:52:46 +00:00
group_by_ttl = other.group_by_ttl;
2020-07-08 23:05:56 +00:00
return *this;
}
2020-06-05 17:29:40 +00:00
TTLTableDescription TTLTableDescription::getTTLForTableFromAST(
const ASTPtr & definition_ast,
const ColumnsDescription & columns,
ContextPtr context,
const KeyDescription & primary_key,
bool is_attach)
2020-06-05 17:29:40 +00:00
{
TTLTableDescription result;
if (!definition_ast)
return result;
result.definition_ast = definition_ast->clone();
bool have_unconditional_delete_ttl = false;
2020-06-05 17:29:40 +00:00
for (const auto & ttl_element_ptr : definition_ast->children)
{
auto ttl = TTLDescription::getTTLFromAST(ttl_element_ptr, columns, context, primary_key, is_attach);
2020-12-25 14:52:46 +00:00
if (ttl.mode == TTLMode::DELETE)
2020-06-05 17:29:40 +00:00
{
if (!ttl.where_expression)
{
if (have_unconditional_delete_ttl)
throw Exception(ErrorCodes::BAD_TTL_EXPRESSION, "More than one DELETE TTL expression without WHERE expression is not allowed");
have_unconditional_delete_ttl = true;
result.rows_ttl = ttl;
}
else
{
result.rows_where_ttl.emplace_back(std::move(ttl));
}
2020-06-05 17:29:40 +00:00
}
2020-08-31 11:35:53 +00:00
else if (ttl.mode == TTLMode::RECOMPRESS)
{
result.recompression_ttl.emplace_back(std::move(ttl));
}
2020-12-25 14:52:46 +00:00
else if (ttl.mode == TTLMode::GROUP_BY)
{
result.group_by_ttl.emplace_back(std::move(ttl));
}
2020-06-05 17:29:40 +00:00
else
2020-08-31 11:35:53 +00:00
{
2020-06-05 17:29:40 +00:00
result.move_ttl.emplace_back(std::move(ttl));
2020-08-31 11:35:53 +00:00
}
2020-06-05 17:29:40 +00:00
}
return result;
}
2022-01-27 08:33:40 +00:00
TTLTableDescription TTLTableDescription::parse(const String & str, const ColumnsDescription & columns, ContextPtr context, const KeyDescription & primary_key)
{
TTLTableDescription result;
if (str.empty())
return result;
2022-01-28 11:07:59 +00:00
ParserTTLExpressionList parser;
2022-01-27 08:33:40 +00:00
ASTPtr ast = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
FunctionNameNormalizer().visit(ast.get());
return getTTLForTableFromAST(ast, columns, context, primary_key);
}
2020-05-28 15:34:33 +00:00
}