try fix failure test & fix build failure

This commit is contained in:
zhang2014 2020-01-04 12:31:45 +08:00
parent 9364397597
commit 6a5e3e2dd9
8 changed files with 115 additions and 98 deletions

View File

@ -55,5 +55,19 @@ void ExpressionInfoMatcher::visit(const ASTIdentifier & identifier, const ASTPtr
}
}
bool hasStatefulFunction(const ASTPtr & node, const Context & context)
{
for (const auto & select_expression : node->children)
{
ExpressionInfoVisitor::Data expression_info{.context = context, .tables = {}};
ExpressionInfoVisitor(expression_info).visit(select_expression);
if (expression_info.is_stateful_function)
return true;
}
return false;
}
}

View File

@ -21,7 +21,7 @@ struct ExpressionInfoMatcher
bool is_array_join = false;
bool is_stateful_function = false;
bool is_aggregate_function = false;
std::unordered_set<size_t> unique_reference_tables_pos;
std::unordered_set<size_t> unique_reference_tables_pos = {};
};
static void visit(const ASTPtr & ast, Data & data);
@ -33,7 +33,8 @@ struct ExpressionInfoMatcher
static void visit(const ASTIdentifier & identifier, const ASTPtr &, Data & data);
};
using ExpressionInfoVisitor = ConstInDepthNodeVisitor<ExpressionInfoMatcher, true>;
bool hasStatefulFunction(const ASTPtr & node, const Context & context);
}

View File

@ -1,29 +1,13 @@
#include <iostream>
#include <Common/typeid_cast.h>
#include <Storages/IStorage.h>
#include <Interpreters/IdentifierSemantic.h>
#include <Interpreters/InterpreterSelectQuery.h>
#include <Interpreters/PredicateExpressionsOptimizer.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <Parsers/IAST.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/queryToString.h>
#include <Interpreters/Context.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/QueryNormalizer.h>
#include <Interpreters/QueryAliasesVisitor.h>
#include <Interpreters/MarkTableIdentifiersVisitor.h>
#include <Interpreters/TranslateQualifiedNamesVisitor.h>
#include <Interpreters/PredicateRewriteVisitor.h>
#include <Interpreters/getTableExpressions.h>
#include <Interpreters/PredicateRewriteVisitor.h>
#include <Interpreters/ExtractExpressionInfoVisitor.h>
#include <Functions/FunctionFactory.h>
namespace DB
@ -43,7 +27,13 @@ PredicateExpressionsOptimizer::PredicateExpressionsOptimizer(
bool PredicateExpressionsOptimizer::optimize(ASTSelectQuery & select_query)
{
if (!settings.enable_optimize_predicate_expression || !select_query.tables() || select_query.tables()->children.empty())
if (!settings.enable_optimize_predicate_expression)
return false;
if (select_query.having() && (!select_query.group_by_with_cube && !select_query.group_by_with_rollup && !select_query.group_by_with_totals))
tryMovePredicatesFromHavingToWhere(select_query);
if (!select_query.tables() || select_query.tables()->children.empty())
return false;
if ((!select_query.where() && !select_query.prewhere()) || select_query.array_join_expression_list())
@ -165,14 +155,61 @@ bool PredicateExpressionsOptimizer::tryRewritePredicatesToTable(ASTPtr & table_e
{
if (!table_predicates.empty())
{
PredicateRewriteVisitor::Data data(
context, table_predicates, table_column, settings.enable_optimize_predicate_expression_to_final_subquery);
auto optimize_final = settings.enable_optimize_predicate_expression_to_final_subquery;
PredicateRewriteVisitor::Data data(context, table_predicates, table_column, optimize_final);
PredicateRewriteVisitor(data).visit(table_element);
return data.isRewrite();
return data.is_rewrite;
}
return false;
}
bool PredicateExpressionsOptimizer::tryMovePredicatesFromHavingToWhere(ASTSelectQuery & select_query)
{
ASTs where_predicates;
ASTs having_predicates;
const auto & reduce_predicates = [&](const ASTs & predicates)
{
ASTPtr res = predicates[0];
for (size_t index = 1; index < predicates.size(); ++index)
res = makeASTFunction("and", res, predicates[index]);
return res;
};
for (const auto & moving_predicate: splitConjunctionPredicate({select_query.having()}))
{
ExpressionInfoVisitor::Data expression_info{.context = context, .tables = {}};
ExpressionInfoVisitor(expression_info).visit(moving_predicate);
/// TODO: If there is no group by, where, and prewhere expression, we can push down the stateful function
if (expression_info.is_stateful_function)
return false;
if (expression_info.is_aggregate_function)
having_predicates.emplace_back(moving_predicate);
else
where_predicates.emplace_back(moving_predicate);
}
if (having_predicates.empty())
select_query.setExpression(ASTSelectQuery::Expression::HAVING, {});
else
{
auto having_predicate = reduce_predicates(having_predicates);
select_query.setExpression(ASTSelectQuery::Expression::HAVING, std::move(having_predicate));
}
if (!where_predicates.empty())
{
auto moved_predicate = reduce_predicates(where_predicates);
moved_predicate = select_query.where() ? makeASTFunction("and", select_query.where(), moved_predicate) : moved_predicate;
select_query.setExpression(ASTSelectQuery::Expression::WHERE, std::move(moved_predicate));
}
return true;
}
}

View File

@ -7,18 +7,13 @@ namespace DB
{
class Context;
class Settings;
struct Settings;
/** This class provides functions for Push-Down predicate expressions
*
* The Example:
* - Query before optimization :
* SELECT id_1, name_1 FROM (SELECT id_1, name_1 FROM table_a UNION ALL SELECT id_2, name_2 FROM table_b)
* WHERE id_1 = 1
* - Query after optimization :
* SELECT id_1, name_1 FROM (SELECT id_1, name_1 FROM table_a WHERE id_1 = 1 UNION ALL SELECT id_2, name_2 FROM table_b WHERE id_2 = 1)
* WHERE id_1 = 1
/** Predicate optimization based on rewriting ast rules
* For more details : https://github.com/ClickHouse/ClickHouse/pull/2015#issuecomment-374283452
* The optimizer does two different optimizations
* - Move predicates from having to where
* - Push the predicate down from the current query to the having of the subquery
*/
class PredicateExpressionsOptimizer
{
@ -51,6 +46,8 @@ private:
bool tryRewritePredicatesToTables(ASTs & tables_element, const std::vector<ASTs> & tables_predicates);
bool tryRewritePredicatesToTable(ASTPtr & table_element, const ASTs & table_predicates, const Names & table_column) const;
bool tryMovePredicatesFromHavingToWhere(ASTSelectQuery & select_query);
};
}

View File

@ -1,23 +1,23 @@
#include <Interpreters/PredicateRewriteVisitor.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTAsterisk.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTColumnsMatcher.h>
#include <Parsers/ASTQualifiedAsterisk.h>
#include <Interpreters/IdentifierSemantic.h>
#include <Interpreters/getTableExpressions.h>
#include <Interpreters/InterpreterSelectQuery.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Interpreters/IdentifierSemantic.h>
#include <Interpreters/ExtractExpressionInfoVisitor.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
namespace DB
{
PredicateRewriteVisitorData::PredicateRewriteVisitorData(
const Context & context_, const ASTs & predicates_, const Names & colunm_names_, bool optimize_final_)
: context(context_), predicates(predicates_), column_names(colunm_names_), optimize_final(optimize_final_)
const Context & context_, const ASTs & predicates_, const Names & column_names_, bool optimize_final_)
: context(context_), predicates(predicates_), column_names(column_names_), optimize_final(optimize_final_)
{
}
@ -44,9 +44,10 @@ void PredicateRewriteVisitorData::visitOtherInternalSelect(ASTSelectQuery & sele
ASTSelectQuery * temp_select_query = temp_internal_select->as<ASTSelectQuery>();
size_t alias_index = 0;
for (const auto ref_select : temp_select_query->refSelect()->children)
for (auto & ref_select : temp_select_query->refSelect()->children)
{
if (!ref_select->as<ASTAsterisk>() && !ref_select->as<ASTQualifiedAsterisk>() && !ref_select->as<ASTColumnsMatcher>())
if (!ref_select->as<ASTAsterisk>() && !ref_select->as<ASTQualifiedAsterisk>() && !ref_select->as<ASTColumnsMatcher>() &&
!ref_select->as<ASTIdentifier>())
{
if (const auto & alias = ref_select->tryGetAlias(); alias.empty())
ref_select->setAlias("--predicate_optimizer_" + toString(alias_index++));
@ -56,15 +57,11 @@ void PredicateRewriteVisitorData::visitOtherInternalSelect(ASTSelectQuery & sele
const Names & internal_columns = InterpreterSelectQuery(
temp_internal_select, context, SelectQueryOptions().analyze()).getSampleBlock().getNames();
if ((is_rewrite |= rewriteSubquery(*temp_select_query, column_names, internal_columns)))
if (rewriteSubquery(*temp_select_query, column_names, internal_columns))
{
is_rewrite |= true;
select_query.setExpression(ASTSelectQuery::Expression::SELECT, std::move(temp_select_query->refSelect()));
if (temp_select_query->where())
select_query.setExpression(ASTSelectQuery::Expression::WHERE, std::move(temp_select_query->refWhere()));
if (temp_select_query->having())
select_query.setExpression(ASTSelectQuery::Expression::HAVING, std::move(temp_select_query->refHaving()));
select_query.setExpression(ASTSelectQuery::Expression::HAVING, std::move(temp_select_query->refHaving()));
}
}
@ -80,32 +77,12 @@ static void cleanAliasAndCollectIdentifiers(ASTPtr & predicate, std::vector<ASTI
identifiers.emplace_back(identifier);
}
bool PredicateRewriteVisitorData::allowPushDown(const ASTSelectQuery &subquery, NameSet & aggregate_column)
{
if ((!optimize_final && subquery.final())
|| subquery.limitBy() || subquery.limitLength()
|| subquery.with() || subquery.withFill())
return false;
for (const auto & select_expression : subquery.select()->children)
{
ExpressionInfoVisitor::Data expression_info{.context = context, .tables = {}};
ExpressionInfoVisitor(expression_info).visit(select_expression);
if (expression_info.is_stateful_function)
return false;
else if (expression_info.is_aggregate_function)
aggregate_column.emplace(select_expression->getAliasOrColumnName());
}
return true;
}
bool PredicateRewriteVisitorData::rewriteSubquery(ASTSelectQuery & subquery, const Names & outer_columns, const Names & inner_columns)
{
NameSet aggregate_columns;
if (!allowPushDown(subquery, aggregate_columns))
if ((!optimize_final && subquery.final())
|| subquery.with() || subquery.withFill()
|| subquery.limitBy() || subquery.limitLength()
|| hasStatefulFunction(subquery.select(), context))
return false;
for (const auto & predicate : predicates)
@ -114,27 +91,23 @@ bool PredicateRewriteVisitorData::rewriteSubquery(ASTSelectQuery & subquery, con
ASTPtr optimize_predicate = predicate->clone();
cleanAliasAndCollectIdentifiers(optimize_predicate, identifiers);
ASTSelectQuery::Expression rewrite_to = ASTSelectQuery::Expression::WHERE;
for (size_t index = 0; index < identifiers.size(); ++index)
{
const auto & column_name = IdentifierSemantic::getColumnName(*identifiers[index]);
const auto & column_name = identifiers[index]->shortName();
const auto & outer_column_iterator = std::find(outer_columns.begin(), outer_columns.end(), column_name);
const auto & iterator = std::find(outer_columns.begin(), outer_columns.end(), column_name);
if (outer_column_iterator == outer_columns.end())
throw Exception("LOGICAL ERROR: the column " + column_name + " does not exists.", ErrorCodes::LOGICAL_ERROR);
if (iterator == outer_columns.end())
throw Exception("", ErrorCodes::LOGICAL_ERROR);
if (aggregate_columns.count(*column_name))
rewrite_to = ASTSelectQuery::Expression::HAVING;
identifiers[index]->setShortName(inner_columns[iterator - outer_columns.begin()]);
identifiers[index]->setShortName(inner_columns[outer_column_iterator - outer_columns.begin()]);
}
ASTPtr optimize_expression = subquery.getExpression(rewrite_to, false);
subquery.setExpression(rewrite_to,
optimize_expression ? makeASTFunction("and", optimize_predicate, optimize_expression) : optimize_predicate);
/// We only need to push all the predicates to subquery having
/// The subquery optimizer will move the appropriate predicates from having to where
subquery.setExpression(ASTSelectQuery::Expression::HAVING,
subquery.having() ? makeASTFunction("and", optimize_predicate, subquery.having()) : optimize_predicate);
}
return true;
}

View File

@ -11,28 +11,23 @@ namespace DB
class PredicateRewriteVisitorData
{
public:
bool is_rewrite = false;
using TypeToVisit = ASTSelectWithUnionQuery;
bool isRewrite() const { return is_rewrite; }
void visit(ASTSelectWithUnionQuery & union_select_query, ASTPtr &);
PredicateRewriteVisitorData(const Context & context_, const ASTs & predicates_, const Names & colunm_names_, bool optimize_final_);
PredicateRewriteVisitorData(const Context & context_, const ASTs & predicates_, const Names & column_names_, bool optimize_final_);
private:
const Context & context;
const ASTs & predicates;
const Names & column_names;
bool optimize_final;
bool is_rewrite = false;
void visitFirstInternalSelect(ASTSelectQuery & select_query, ASTPtr &);
void visitOtherInternalSelect(ASTSelectQuery & select_query, ASTPtr &);
bool allowPushDown(const ASTSelectQuery & subquery, NameSet & aggregate_column);
bool rewriteSubquery(ASTSelectQuery & subquery, const Names & outer_columns, const Names & inner_columns);
};

View File

@ -920,6 +920,9 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
if (select_query)
{
/// Push the predicate expression down to the subqueries.
result.rewrite_subqueries = PredicateExpressionsOptimizer(context, tables_with_columns, settings).optimize(*select_query);
/// GROUP BY injective function elimination.
optimizeGroupBy(select_query, source_columns_set, context);
@ -935,9 +938,6 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
/// array_join_alias_to_name, array_join_result_to_source.
getArrayJoinedColumns(query, result, select_query, result.source_columns, source_columns_set);
/// Push the predicate expression down to the subqueries.
result.rewrite_subqueries = PredicateExpressionsOptimizer(context, tables_with_columns, settings).optimize(*select_query);
setJoinStrictness(*select_query, settings.join_default_strictness, settings.any_join_distinct_right_table_keys,
result.analyzed_join->table_join);
collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases);

View File

@ -13,7 +13,7 @@ SELECT \n a, \n b\nFROM \n(\n SELECT \n 1 AS a, \n 1 AS b
-------Need push down-------
SELECT toString(value) AS value\nFROM \n(\n SELECT 1 AS value\n)
1
SELECT id\nFROM \n(\n SELECT 1 AS id\n UNION ALL\n SELECT 2 AS `2`\n WHERE 0\n)\nWHERE id = 1
SELECT id\nFROM \n(\n SELECT 1 AS id\n UNION ALL\n SELECT 2 AS `--predicate_optimizer_0`\n WHERE 0\n)\nWHERE id = 1
1
SELECT id\nFROM \n(\n SELECT arrayJoin([1, 2, 3]) AS id\n WHERE id = 1\n)\nWHERE id = 1
1