ClickHouse/src/Interpreters/PredicateRewriteVisitor.cpp

143 lines
5.5 KiB
C++
Raw Normal View History

2019-12-18 03:56:03 +00:00
#include <Interpreters/PredicateRewriteVisitor.h>
#include <Parsers/ASTFunction.h>
2019-12-18 03:56:03 +00:00
#include <Parsers/ASTAsterisk.h>
2020-01-06 03:55:07 +00:00
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTIdentifier.h>
2019-12-18 03:56:03 +00:00
#include <Parsers/ASTColumnsMatcher.h>
#include <Parsers/ASTQualifiedAsterisk.h>
#include <Interpreters/IdentifierSemantic.h>
2019-12-18 03:56:03 +00:00
#include <Interpreters/getTableExpressions.h>
#include <Interpreters/InterpreterSelectQuery.h>
#include <Interpreters/ExtractExpressionInfoVisitor.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
2019-12-18 03:56:03 +00:00
namespace DB
{
PredicateRewriteVisitorData::PredicateRewriteVisitorData(
2021-06-01 12:20:52 +00:00
ContextPtr context_,
const ASTs & predicates_,
const TableWithColumnNamesAndTypes & table_columns_,
bool optimize_final_,
bool optimize_with_)
2021-06-01 12:20:52 +00:00
: WithContext(context_)
, predicates(predicates_)
, table_columns(table_columns_)
, optimize_final(optimize_final_)
, optimize_with(optimize_with_)
2019-12-18 03:56:03 +00:00
{
}
void PredicateRewriteVisitorData::visit(ASTSelectWithUnionQuery & union_select_query, ASTPtr &)
{
auto & internal_select_list = union_select_query.list_of_selects->children;
for (size_t index = 0; index < internal_select_list.size(); ++index)
{
2021-02-01 00:44:40 +00:00
if (auto * child_union = internal_select_list[index]->as<ASTSelectWithUnionQuery>())
visit(*child_union, internal_select_list[index]);
else
{
if (index == 0)
visitFirstInternalSelect(*internal_select_list[0]->as<ASTSelectQuery>(), internal_select_list[0]);
else
visitOtherInternalSelect(*internal_select_list[index]->as<ASTSelectQuery>(), internal_select_list[index]);
}
}
2019-12-18 03:56:03 +00:00
}
void PredicateRewriteVisitorData::visitFirstInternalSelect(ASTSelectQuery & select_query, ASTPtr &)
{
/// In this case inner_columns same as outer_columns from table_columns
is_rewrite |= rewriteSubquery(select_query, table_columns.columns.getNames());
2019-12-18 03:56:03 +00:00
}
void PredicateRewriteVisitorData::visitOtherInternalSelect(ASTSelectQuery & select_query, ASTPtr &)
{
/// For non first select, its alias has no more significance, so we can set a temporary alias for them
ASTPtr temp_internal_select = select_query.clone();
ASTSelectQuery * temp_select_query = temp_internal_select->as<ASTSelectQuery>();
size_t alias_index = 0;
for (auto & ref_select : temp_select_query->refSelect()->children)
2019-12-18 03:56:03 +00:00
{
if (!ref_select->as<ASTAsterisk>() && !ref_select->as<ASTQualifiedAsterisk>() && !ref_select->as<ASTColumnsMatcher>() &&
!ref_select->as<ASTIdentifier>())
2019-12-18 03:56:03 +00:00
{
if (const auto & alias = ref_select->tryGetAlias(); alias.empty())
ref_select->setAlias("--predicate_optimizer_" + toString(alias_index++));
}
}
const Names & internal_columns = InterpreterSelectQuery(
temp_internal_select,
const_pointer_cast<Context>(getContext()),
SelectQueryOptions().analyze()).getSampleBlock().getNames();
2019-12-18 03:56:03 +00:00
if (rewriteSubquery(*temp_select_query, internal_columns))
2019-12-18 03:56:03 +00:00
{
is_rewrite |= true;
2019-12-18 03:56:03 +00:00
select_query.setExpression(ASTSelectQuery::Expression::SELECT, std::move(temp_select_query->refSelect()));
select_query.setExpression(ASTSelectQuery::Expression::HAVING, std::move(temp_select_query->refHaving()));
2019-12-18 03:56:03 +00:00
}
}
static void cleanAliasAndCollectIdentifiers(ASTPtr & predicate, std::vector<ASTIdentifier *> & identifiers)
{
2020-01-07 03:31:03 +00:00
/// Skip WHERE x in (SELECT ...)
if (!predicate->as<ASTSubquery>())
2020-01-06 03:55:07 +00:00
{
2020-01-06 10:56:17 +00:00
for (auto & children : predicate->children)
2020-01-06 03:55:07 +00:00
cleanAliasAndCollectIdentifiers(children, identifiers);
}
2019-12-18 03:56:03 +00:00
if (const auto alias = predicate->tryGetAlias(); !alias.empty())
predicate->setAlias({});
2019-12-18 03:56:03 +00:00
if (ASTIdentifier * identifier = predicate->as<ASTIdentifier>())
identifiers.emplace_back(identifier);
}
bool PredicateRewriteVisitorData::rewriteSubquery(ASTSelectQuery & subquery, const Names & inner_columns)
2019-12-18 03:56:03 +00:00
{
if ((!optimize_final && subquery.final())
|| (!optimize_with && subquery.with())
|| subquery.withFill()
2019-12-18 03:56:03 +00:00
|| subquery.limitBy() || subquery.limitLength()
|| hasNonRewritableFunction(subquery.select(), getContext()))
2019-12-18 03:56:03 +00:00
return false;
Names outer_columns = table_columns.columns.getNames();
2019-12-18 03:56:03 +00:00
for (const auto & predicate : predicates)
{
std::vector<ASTIdentifier *> identifiers;
ASTPtr optimize_predicate = predicate->clone();
cleanAliasAndCollectIdentifiers(optimize_predicate, identifiers);
2020-03-09 00:28:05 +00:00
for (const auto & identifier : identifiers)
2019-12-18 03:56:03 +00:00
{
IdentifierSemantic::setColumnShortName(*identifier, table_columns.table);
2021-02-17 11:57:16 +00:00
const auto & column_name = identifier->name();
2019-12-18 03:56:03 +00:00
2020-01-07 03:31:03 +00:00
/// For lambda functions, we can't always find them in the list of columns
/// For example: SELECT * FROM system.one WHERE arrayMap(x -> x, [dummy]) = [0]
const auto & outer_column_iterator = std::find(outer_columns.begin(), outer_columns.end(), column_name);
2020-01-06 10:33:08 +00:00
if (outer_column_iterator != outer_columns.end())
{
identifier->setShortName(inner_columns[outer_column_iterator - outer_columns.begin()]);
}
2019-12-18 03:56:03 +00:00
}
/// We only need to push all the predicates to subquery having
/// The subquery optimizer will move the appropriate predicates from having to where
subquery.setExpression(ASTSelectQuery::Expression::HAVING,
subquery.having() ? makeASTFunction("and", optimize_predicate, subquery.having()) : optimize_predicate);
2019-12-18 03:56:03 +00:00
}
2019-12-18 03:56:03 +00:00
return true;
}
}