ClickHouse/src/Interpreters/PredicateRewriteVisitor.cpp

222 lines
8.3 KiB
C++
Raw Normal View History

2019-12-18 03:56:03 +00:00
#include <Interpreters/PredicateRewriteVisitor.h>
#include <Parsers/ASTFunction.h>
2019-12-18 03:56:03 +00:00
#include <Parsers/ASTAsterisk.h>
2020-01-06 03:55:07 +00:00
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTIdentifier.h>
2019-12-18 03:56:03 +00:00
#include <Parsers/ASTColumnsMatcher.h>
#include <Parsers/ASTQualifiedAsterisk.h>
2021-11-01 13:19:31 +00:00
#include <Parsers/ASTSelectIntersectExceptQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Interpreters/IdentifierSemantic.h>
2019-12-18 03:56:03 +00:00
#include <Interpreters/getTableExpressions.h>
#include <Interpreters/InterpreterSelectQuery.h>
#include <Interpreters/ExtractExpressionInfoVisitor.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
2019-12-18 03:56:03 +00:00
namespace DB
{
PredicateRewriteVisitorData::PredicateRewriteVisitorData(
2021-06-01 12:20:52 +00:00
ContextPtr context_,
const ASTs & predicates_,
const TableWithColumnNamesAndTypes & table_columns_,
bool optimize_final_,
bool optimize_with_)
2021-06-01 12:20:52 +00:00
: WithContext(context_)
, predicates(predicates_)
, table_columns(table_columns_)
, optimize_final(optimize_final_)
, optimize_with(optimize_with_)
2019-12-18 03:56:03 +00:00
{
}
void PredicateRewriteVisitorData::visit(ASTSelectWithUnionQuery & union_select_query, ASTPtr &)
{
auto & internal_select_list = union_select_query.list_of_selects->children;
for (size_t index = 0; index < internal_select_list.size(); ++index)
{
2021-02-01 00:44:40 +00:00
if (auto * child_union = internal_select_list[index]->as<ASTSelectWithUnionQuery>())
2021-11-01 13:19:31 +00:00
{
visit(*child_union, internal_select_list[index]);
2021-11-01 13:19:31 +00:00
}
2021-11-10 09:33:48 +00:00
else if (auto * child_select = internal_select_list[index]->as<ASTSelectQuery>())
{
visitInternalSelect(index, *child_select, internal_select_list[index]);
}
2021-11-01 13:19:31 +00:00
else if (auto * child_intersect_except = internal_select_list[index]->as<ASTSelectIntersectExceptQuery>())
{
2021-11-10 09:33:48 +00:00
visit(*child_intersect_except, internal_select_list[index]);
2021-11-01 13:19:31 +00:00
}
2021-11-10 09:33:48 +00:00
}
}
void PredicateRewriteVisitorData::visitInternalSelect(size_t index, ASTSelectQuery & select_node, ASTPtr & node)
{
if (index == 0)
visitFirstInternalSelect(select_node, node);
else
visitOtherInternalSelect(select_node, node);
}
void PredicateRewriteVisitorData::visit(ASTSelectIntersectExceptQuery & intersect_except_query, ASTPtr &)
{
auto internal_select_list = intersect_except_query.getListOfSelects();
for (size_t index = 0; index < internal_select_list.size(); ++index)
{
if (auto * union_node = internal_select_list[index]->as<ASTSelectWithUnionQuery>())
{
visit(*union_node, internal_select_list[index]);
}
else if (auto * select_node = internal_select_list[index]->as<ASTSelectQuery>())
{
visitInternalSelect(index, *select_node, internal_select_list[index]);
}
else if (auto * intersect_node = internal_select_list[index]->as<ASTSelectIntersectExceptQuery>())
{
2021-11-10 09:33:48 +00:00
visit(*intersect_node, internal_select_list[index]);
}
}
2019-12-18 03:56:03 +00:00
}
void PredicateRewriteVisitorData::visitFirstInternalSelect(ASTSelectQuery & select_query, ASTPtr &)
{
/// In this case inner_columns same as outer_columns from table_columns
is_rewrite |= rewriteSubquery(select_query, table_columns.columns.getNames());
2019-12-18 03:56:03 +00:00
}
void PredicateRewriteVisitorData::visitOtherInternalSelect(ASTSelectQuery & select_query, ASTPtr &)
{
/// For non first select, its alias has no more significance, so we can set a temporary alias for them
ASTPtr temp_internal_select = select_query.clone();
ASTSelectQuery * temp_select_query = temp_internal_select->as<ASTSelectQuery>();
size_t alias_index = 0;
for (auto & ref_select : temp_select_query->refSelect()->children)
2019-12-18 03:56:03 +00:00
{
if (!ref_select->as<ASTAsterisk>() && !ref_select->as<ASTQualifiedAsterisk>() && !ref_select->as<ASTColumnsListMatcher>()
&& !ref_select->as<ASTColumnsRegexpMatcher>() && !ref_select->as<ASTIdentifier>())
2019-12-18 03:56:03 +00:00
{
if (const auto & alias = ref_select->tryGetAlias(); alias.empty())
ref_select->setAlias("--predicate_optimizer_" + toString(alias_index++));
}
}
const Names & internal_columns = InterpreterSelectQuery(
temp_internal_select,
const_pointer_cast<Context>(getContext()),
SelectQueryOptions().analyze()).getSampleBlock().getNames();
2019-12-18 03:56:03 +00:00
if (rewriteSubquery(*temp_select_query, internal_columns))
2019-12-18 03:56:03 +00:00
{
is_rewrite |= true;
2019-12-18 03:56:03 +00:00
select_query.setExpression(ASTSelectQuery::Expression::SELECT, std::move(temp_select_query->refSelect()));
select_query.setExpression(ASTSelectQuery::Expression::HAVING, std::move(temp_select_query->refHaving()));
2019-12-18 03:56:03 +00:00
}
}
static void cleanAliasAndCollectIdentifiers(ASTPtr & predicate, std::vector<ASTIdentifier *> & identifiers)
{
2020-01-07 03:31:03 +00:00
/// Skip WHERE x in (SELECT ...)
if (!predicate->as<ASTSubquery>())
2020-01-06 03:55:07 +00:00
{
2020-01-06 10:56:17 +00:00
for (auto & children : predicate->children)
2020-01-06 03:55:07 +00:00
cleanAliasAndCollectIdentifiers(children, identifiers);
}
2019-12-18 03:56:03 +00:00
if (const auto alias = predicate->tryGetAlias(); !alias.empty())
predicate->setAlias({});
2019-12-18 03:56:03 +00:00
if (ASTIdentifier * identifier = predicate->as<ASTIdentifier>())
identifiers.emplace_back(identifier);
}
/// Clean aliases and use aliased name
/// Transforms `(a = b as c) AND (x = y)` to `(a = c) AND (x = y)`
static void useAliasInsteadOfIdentifier(const ASTPtr & predicate)
{
if (!predicate->as<ASTSubquery>())
{
for (auto & children : predicate->children)
useAliasInsteadOfIdentifier(children);
}
if (const auto alias = predicate->tryGetAlias(); !alias.empty())
{
if (ASTIdentifier * identifier = predicate->as<ASTIdentifier>())
identifier->setShortName(alias);
predicate->setAlias({});
}
}
static void getConjunctionHashesFrom(const ASTPtr & ast, std::set<IAST::Hash> & hashes)
{
for (const auto & pred : splitConjunctionsAst(ast))
{
/// Clone not to modify `ast`
ASTPtr pred_copy = pred->clone();
useAliasInsteadOfIdentifier(pred_copy);
hashes.emplace(pred_copy->getTreeHash());
}
}
bool PredicateRewriteVisitorData::rewriteSubquery(ASTSelectQuery & subquery, const Names & inner_columns)
2019-12-18 03:56:03 +00:00
{
if ((!optimize_final && subquery.final())
|| (!optimize_with && subquery.with())
|| subquery.withFill()
2022-09-13 19:38:55 +00:00
|| subquery.limitBy() || subquery.limitLength() || subquery.limitByLength() || subquery.limitByOffset()
|| hasNonRewritableFunction(subquery.select(), getContext())
2022-09-13 19:38:55 +00:00
|| (subquery.orderBy() && subquery.limitOffset()))
2019-12-18 03:56:03 +00:00
return false;
Names outer_columns = table_columns.columns.getNames();
/// Do not add same conditions twice to avoid extra rewrites with exponential blowup
/// (e.g. in case of deep complex query with lots of JOINs)
std::set<IAST::Hash> hashes;
getConjunctionHashesFrom(subquery.where(), hashes);
getConjunctionHashesFrom(subquery.having(), hashes);
bool is_changed = false;
2019-12-18 03:56:03 +00:00
for (const auto & predicate : predicates)
{
std::vector<ASTIdentifier *> identifiers;
ASTPtr optimize_predicate = predicate->clone();
cleanAliasAndCollectIdentifiers(optimize_predicate, identifiers);
auto predicate_hash = optimize_predicate->getTreeHash();
if (hashes.contains(predicate_hash))
continue;
hashes.emplace(predicate_hash);
is_changed = true;
2020-03-09 00:28:05 +00:00
for (const auto & identifier : identifiers)
2019-12-18 03:56:03 +00:00
{
IdentifierSemantic::setColumnShortName(*identifier, table_columns.table);
2021-02-17 11:57:16 +00:00
const auto & column_name = identifier->name();
2019-12-18 03:56:03 +00:00
2020-01-07 03:31:03 +00:00
/// For lambda functions, we can't always find them in the list of columns
/// For example: SELECT * FROM system.one WHERE arrayMap(x -> x, [dummy]) = [0]
const auto & outer_column_iterator = std::find(outer_columns.begin(), outer_columns.end(), column_name);
2020-01-06 10:33:08 +00:00
if (outer_column_iterator != outer_columns.end())
{
identifier->setShortName(inner_columns[outer_column_iterator - outer_columns.begin()]);
}
2019-12-18 03:56:03 +00:00
}
/// We only need to push all the predicates to subquery having
/// The subquery optimizer will move the appropriate predicates from having to where
subquery.setExpression(ASTSelectQuery::Expression::HAVING,
subquery.having() ? makeASTFunction("and", optimize_predicate, subquery.having()) : optimize_predicate);
2019-12-18 03:56:03 +00:00
}
return is_changed;
2019-12-18 03:56:03 +00:00
}
}