mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
ISSUES-3105 fix some bugs in the predicate optimizer
This commit is contained in:
parent
94183c7af1
commit
25f79593a2
@ -67,6 +67,8 @@
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <Interpreters/evaluateQualified.h>
|
||||
#include <Interpreters/QueryNormalizer.h>
|
||||
#include <Interpreters/getQueryAliases.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -226,7 +228,7 @@ ExpressionAnalyzer::ExpressionAnalyzer(
|
||||
LogicalExpressionsOptimizer(select_query, settings).perform();
|
||||
|
||||
/// Creates a dictionary `aliases`: alias -> ASTPtr
|
||||
addASTAliases(query);
|
||||
getQueryAliases(query, aliases);
|
||||
|
||||
/// Common subexpression elimination. Rewrite rules.
|
||||
normalizeTree();
|
||||
@ -860,277 +862,23 @@ static NamesAndTypesList::iterator findColumn(const String & name, NamesAndTypes
|
||||
}
|
||||
|
||||
|
||||
/// ignore_levels - aliases in how many upper levels of the subtree should be ignored.
|
||||
/// For example, with ignore_levels=1 ast can not be put in the dictionary, but its children can.
|
||||
void ExpressionAnalyzer::addASTAliases(ASTPtr & ast, int ignore_levels)
|
||||
{
|
||||
/// Bottom-up traversal. We do not go into subqueries.
|
||||
for (auto & child : ast->children)
|
||||
{
|
||||
int new_ignore_levels = std::max(0, ignore_levels - 1);
|
||||
|
||||
/// The top-level aliases in the ARRAY JOIN section have a special meaning, we will not add them
|
||||
/// (skip the expression list itself and its children).
|
||||
if (typeid_cast<ASTArrayJoin *>(ast.get()))
|
||||
new_ignore_levels = 3;
|
||||
|
||||
/// Don't descent into table functions and subqueries.
|
||||
if (!typeid_cast<ASTTableExpression *>(child.get())
|
||||
&& !typeid_cast<ASTSelectWithUnionQuery *>(child.get()))
|
||||
addASTAliases(child, new_ignore_levels);
|
||||
}
|
||||
|
||||
if (ignore_levels > 0)
|
||||
return;
|
||||
|
||||
String alias = ast->tryGetAlias();
|
||||
if (!alias.empty())
|
||||
{
|
||||
if (aliases.count(alias) && ast->getTreeHash() != aliases[alias]->getTreeHash())
|
||||
{
|
||||
std::stringstream message;
|
||||
message << "Different expressions with the same alias " << backQuoteIfNeed(alias) << ":\n";
|
||||
formatAST(*ast, message, false, true);
|
||||
message << "\nand\n";
|
||||
formatAST(*aliases[alias], message, false, true);
|
||||
message << "\n";
|
||||
|
||||
throw Exception(message.str(), ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS);
|
||||
}
|
||||
|
||||
aliases[alias] = ast;
|
||||
}
|
||||
else if (auto subquery = typeid_cast<ASTSubquery *>(ast.get()))
|
||||
{
|
||||
/// Set unique aliases for all subqueries. This is needed, because content of subqueries could change after recursive analysis,
|
||||
/// and auto-generated column names could become incorrect.
|
||||
|
||||
if (subquery->alias.empty())
|
||||
{
|
||||
size_t subquery_index = 1;
|
||||
while (true)
|
||||
{
|
||||
alias = "_subquery" + toString(subquery_index);
|
||||
if (!aliases.count("_subquery" + toString(subquery_index)))
|
||||
break;
|
||||
++subquery_index;
|
||||
}
|
||||
|
||||
subquery->setAlias(alias);
|
||||
subquery->prefer_alias_to_column_name = true;
|
||||
aliases[alias] = ast;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void ExpressionAnalyzer::normalizeTree()
|
||||
{
|
||||
SetOfASTs tmp_set;
|
||||
MapOfASTs tmp_map;
|
||||
normalizeTreeImpl(query, tmp_map, tmp_set, "", 0);
|
||||
Names all_columns_name;
|
||||
|
||||
try
|
||||
auto columns_name = storage ? storage->getColumns().ordinary.getNames() : source_columns.getNames();
|
||||
all_columns_name.insert(all_columns_name.begin(), columns_name.begin(), columns_name.end());
|
||||
|
||||
if (!settings.asterisk_left_columns_only)
|
||||
{
|
||||
query->checkSize(settings.max_expanded_ast_elements);
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage("(after expansion of aliases)");
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// finished_asts - already processed vertices (and by what they replaced)
|
||||
/// current_asts - vertices in the current call stack of this method
|
||||
/// current_alias - the alias referencing to the ancestor of ast (the deepest ancestor with aliases)
|
||||
void ExpressionAnalyzer::normalizeTreeImpl(
|
||||
ASTPtr & ast, MapOfASTs & finished_asts, SetOfASTs & current_asts, std::string current_alias, size_t level)
|
||||
{
|
||||
if (level > settings.max_ast_depth)
|
||||
throw Exception("Normalized AST is too deep. Maximum: "
|
||||
+ settings.max_ast_depth.toString(), ErrorCodes::TOO_DEEP_AST);
|
||||
|
||||
if (finished_asts.count(ast))
|
||||
{
|
||||
ast = finished_asts[ast];
|
||||
return;
|
||||
auto columns_from_joined_table = analyzed_join.getColumnsFromJoinedTable(context, select_query).getNames();
|
||||
all_columns_name.insert(all_columns_name.end(), columns_from_joined_table.begin(), columns_from_joined_table.end());
|
||||
}
|
||||
|
||||
ASTPtr initial_ast = ast;
|
||||
current_asts.insert(initial_ast.get());
|
||||
if (all_columns_name.empty())
|
||||
throw Exception("Logical error: an asterisk cannot be replaced with empty columns.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
String my_alias = ast->tryGetAlias();
|
||||
if (!my_alias.empty())
|
||||
current_alias = my_alias;
|
||||
|
||||
/// rewrite rules that act when you go from top to bottom.
|
||||
bool replaced = false;
|
||||
|
||||
ASTIdentifier * identifier_node = nullptr;
|
||||
ASTFunction * func_node = nullptr;
|
||||
|
||||
if ((func_node = typeid_cast<ASTFunction *>(ast.get())))
|
||||
{
|
||||
/// `IN t` can be specified, where t is a table, which is equivalent to `IN (SELECT * FROM t)`.
|
||||
if (functionIsInOrGlobalInOperator(func_node->name))
|
||||
if (ASTIdentifier * right = typeid_cast<ASTIdentifier *>(func_node->arguments->children.at(1).get()))
|
||||
if (!aliases.count(right->name))
|
||||
right->kind = ASTIdentifier::Table;
|
||||
|
||||
/// Special cases for count function.
|
||||
String func_name_lowercase = Poco::toLower(func_node->name);
|
||||
if (startsWith(func_name_lowercase, "count"))
|
||||
{
|
||||
/// Select implementation of countDistinct based on settings.
|
||||
/// Important that it is done as query rewrite. It means rewritten query
|
||||
/// will be sent to remote servers during distributed query execution,
|
||||
/// and on all remote servers, function implementation will be same.
|
||||
if (endsWith(func_node->name, "Distinct") && func_name_lowercase == "countdistinct")
|
||||
func_node->name = settings.count_distinct_implementation;
|
||||
|
||||
/// As special case, treat count(*) as count(), not as count(list of all columns).
|
||||
if (func_name_lowercase == "count" && func_node->arguments->children.size() == 1
|
||||
&& typeid_cast<const ASTAsterisk *>(func_node->arguments->children[0].get()))
|
||||
{
|
||||
func_node->arguments->children.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
else if ((identifier_node = typeid_cast<ASTIdentifier *>(ast.get())))
|
||||
{
|
||||
if (identifier_node->kind == ASTIdentifier::Column)
|
||||
{
|
||||
/// If it is an alias, but not a parent alias (for constructs like "SELECT column + 1 AS column").
|
||||
auto it_alias = aliases.find(identifier_node->name);
|
||||
if (it_alias != aliases.end() && current_alias != identifier_node->name)
|
||||
{
|
||||
/// Let's replace it with the corresponding tree node.
|
||||
if (current_asts.count(it_alias->second.get()))
|
||||
throw Exception("Cyclic aliases", ErrorCodes::CYCLIC_ALIASES);
|
||||
|
||||
if (!my_alias.empty() && my_alias != it_alias->second->getAliasOrColumnName())
|
||||
{
|
||||
/// Avoid infinite recursion here
|
||||
auto replace_to_identifier = typeid_cast<ASTIdentifier *>(it_alias->second.get());
|
||||
bool is_cycle = replace_to_identifier &&
|
||||
replace_to_identifier->kind == ASTIdentifier::Column &&
|
||||
replace_to_identifier->name == identifier_node->name;
|
||||
|
||||
if (!is_cycle)
|
||||
{
|
||||
/// In a construct like "a AS b", where a is an alias, you must set alias b to the result of substituting alias a.
|
||||
ast = it_alias->second->clone();
|
||||
ast->setAlias(my_alias);
|
||||
replaced = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ast = it_alias->second;
|
||||
replaced = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (ASTExpressionList * expr_list = typeid_cast<ASTExpressionList *>(ast.get()))
|
||||
{
|
||||
/// Replace * with a list of columns.
|
||||
ASTs & asts = expr_list->children;
|
||||
for (int i = static_cast<int>(asts.size()) - 1; i >= 0; --i)
|
||||
{
|
||||
if (typeid_cast<ASTAsterisk *>(asts[i].get()))
|
||||
{
|
||||
Names all_columns_name;
|
||||
|
||||
auto columns_name = storage ? storage->getColumns().ordinary.getNames() : source_columns.getNames();
|
||||
all_columns_name.insert(all_columns_name.begin(), columns_name.begin(), columns_name.end());
|
||||
|
||||
if (!settings.asterisk_left_columns_only)
|
||||
{
|
||||
auto columns_from_joined_table = analyzed_join.getColumnsFromJoinedTable(context, select_query).getNames();
|
||||
all_columns_name.insert(all_columns_name.end(), columns_from_joined_table.begin(), columns_from_joined_table.end());
|
||||
}
|
||||
|
||||
asts.erase(asts.begin() + i);
|
||||
for (size_t idx = 0; idx < all_columns_name.size(); idx++)
|
||||
asts.insert(asts.begin() + idx + i, std::make_shared<ASTIdentifier>(all_columns_name[idx]));
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (ASTTablesInSelectQueryElement * tables_elem = typeid_cast<ASTTablesInSelectQueryElement *>(ast.get()))
|
||||
{
|
||||
if (tables_elem->table_expression)
|
||||
{
|
||||
auto & database_and_table_name = static_cast<ASTTableExpression &>(*tables_elem->table_expression).database_and_table_name;
|
||||
if (database_and_table_name)
|
||||
{
|
||||
if (ASTIdentifier * right = typeid_cast<ASTIdentifier *>(database_and_table_name.get()))
|
||||
{
|
||||
right->kind = ASTIdentifier::Table;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// If we replace the root of the subtree, we will be called again for the new root, in case the alias is replaced by an alias.
|
||||
if (replaced)
|
||||
{
|
||||
normalizeTreeImpl(ast, finished_asts, current_asts, current_alias, level + 1);
|
||||
current_asts.erase(initial_ast.get());
|
||||
current_asts.erase(ast.get());
|
||||
finished_asts[initial_ast] = ast;
|
||||
return;
|
||||
}
|
||||
|
||||
/// Recurring calls. Don't go into subqueries. Don't go into components of compound identifiers.
|
||||
/// We also do not go to the left argument of lambda expressions, so as not to replace the formal parameters
|
||||
/// on aliases in expressions of the form 123 AS x, arrayMap(x -> 1, [2]).
|
||||
|
||||
if (func_node && func_node->name == "lambda")
|
||||
{
|
||||
/// We skip the first argument. We also assume that the lambda function can not have parameters.
|
||||
for (size_t i = 1, size = func_node->arguments->children.size(); i < size; ++i)
|
||||
{
|
||||
auto & child = func_node->arguments->children[i];
|
||||
|
||||
if (typeid_cast<const ASTSelectQuery *>(child.get())
|
||||
|| typeid_cast<const ASTTableExpression *>(child.get()))
|
||||
continue;
|
||||
|
||||
normalizeTreeImpl(child, finished_asts, current_asts, current_alias, level + 1);
|
||||
}
|
||||
}
|
||||
else if (identifier_node)
|
||||
{
|
||||
}
|
||||
else
|
||||
{
|
||||
for (auto & child : ast->children)
|
||||
{
|
||||
if (typeid_cast<const ASTSelectQuery *>(child.get())
|
||||
|| typeid_cast<const ASTTableExpression *>(child.get()))
|
||||
continue;
|
||||
|
||||
normalizeTreeImpl(child, finished_asts, current_asts, current_alias, level + 1);
|
||||
}
|
||||
}
|
||||
|
||||
/// If the WHERE clause or HAVING consists of a single alias, the reference must be replaced not only in children, but also in where_expression and having_expression.
|
||||
if (ASTSelectQuery * select = typeid_cast<ASTSelectQuery *>(ast.get()))
|
||||
{
|
||||
if (select->prewhere_expression)
|
||||
normalizeTreeImpl(select->prewhere_expression, finished_asts, current_asts, current_alias, level + 1);
|
||||
if (select->where_expression)
|
||||
normalizeTreeImpl(select->where_expression, finished_asts, current_asts, current_alias, level + 1);
|
||||
if (select->having_expression)
|
||||
normalizeTreeImpl(select->having_expression, finished_asts, current_asts, current_alias, level + 1);
|
||||
}
|
||||
|
||||
current_asts.erase(initial_ast.get());
|
||||
current_asts.erase(ast.get());
|
||||
finished_asts[initial_ast] = ast;
|
||||
QueryNormalizer(query, aliases, settings, all_columns_name).perform();
|
||||
}
|
||||
|
||||
|
||||
|
@ -310,15 +310,10 @@ private:
|
||||
/// Parse JOIN ON expression and collect ASTs for joined columns.
|
||||
void collectJoinedColumnsFromJoinOnExpr();
|
||||
|
||||
/** Create a dictionary of aliases.
|
||||
*/
|
||||
void addASTAliases(ASTPtr & ast, int ignore_levels = 0);
|
||||
|
||||
/** For star nodes(`*`), expand them to a list of all columns.
|
||||
* For literal nodes, substitute aliases.
|
||||
*/
|
||||
void normalizeTree();
|
||||
void normalizeTreeImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOfASTs & current_asts, std::string current_alias, size_t level);
|
||||
|
||||
/// Eliminates injective function calls and constant expressions from group by statement
|
||||
void optimizeGroupBy();
|
||||
|
@ -7,6 +7,8 @@
|
||||
#include <Parsers/ASTAsterisk.h>
|
||||
#include <Parsers/ASTQualifiedAsterisk.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <Interpreters/QueryNormalizer.h>
|
||||
#include <Interpreters/getQueryAliases.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -50,6 +52,9 @@ bool PredicateExpressionsOptimizer::optimizeImpl(
|
||||
bool is_rewrite_subquery = false;
|
||||
for (const auto & outer_predicate : outer_predicate_expressions)
|
||||
{
|
||||
if (isArrayJoinFunction(outer_predicate))
|
||||
continue;
|
||||
|
||||
IdentifiersWithQualifiedNameSet outer_predicate_dependencies;
|
||||
getDependenciesAndQualifiedOfExpression(outer_predicate, outer_predicate_dependencies, database_and_table_with_aliases);
|
||||
|
||||
@ -181,6 +186,21 @@ bool PredicateExpressionsOptimizer::cannotPushDownOuterPredicate(
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PredicateExpressionsOptimizer::isArrayJoinFunction(const ASTPtr & node)
|
||||
{
|
||||
if (auto function = typeid_cast<ASTFunction *>(node.get()))
|
||||
{
|
||||
if (function->name == "arrayJoin")
|
||||
return true;
|
||||
}
|
||||
|
||||
for (auto & child : node->children)
|
||||
if (isArrayJoinFunction(child))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PredicateExpressionsOptimizer::isAggregateFunction(ASTPtr & node)
|
||||
{
|
||||
if (auto function = typeid_cast<ASTFunction *>(node.get()))
|
||||
@ -210,7 +230,12 @@ void PredicateExpressionsOptimizer::cloneOuterPredicateForInnerPredicate(
|
||||
for (auto projection : projection_columns)
|
||||
{
|
||||
if (require.second == projection.second)
|
||||
require.first->name = projection.first->getAliasOrColumnName();
|
||||
{
|
||||
ASTPtr & ast = projection.first;
|
||||
if (!typeid_cast<ASTIdentifier *>(ast.get()) && ast->tryGetAlias().empty())
|
||||
ast->setAlias(ast->getColumnName());
|
||||
require.first->name = ast->getAliasOrColumnName();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -275,6 +300,11 @@ void PredicateExpressionsOptimizer::getSubqueryProjectionColumns(SubqueriesProje
|
||||
|
||||
ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast)
|
||||
{
|
||||
/// first should normalize query tree.
|
||||
std::unordered_map<String, ASTPtr> aliases;
|
||||
getQueryAliases(ast, aliases, 0);
|
||||
QueryNormalizer(ast, aliases, settings, {}).perform();
|
||||
|
||||
ASTs projection_columns;
|
||||
auto select_query = static_cast<ASTSelectQuery *>(ast.get());
|
||||
|
||||
|
@ -61,6 +61,8 @@ private:
|
||||
|
||||
bool isAggregateFunction(ASTPtr & node);
|
||||
|
||||
bool isArrayJoinFunction(const ASTPtr & node);
|
||||
|
||||
PredicateExpressions splitConjunctionPredicate(ASTPtr & predicate_expression);
|
||||
|
||||
void getDependenciesAndQualifiedOfExpression(const ASTPtr & expression, IdentifiersWithQualifiedNameSet & dependencies_and_qualified,
|
||||
|
231
dbms/src/Interpreters/QueryNormalizer.cpp
Normal file
231
dbms/src/Interpreters/QueryNormalizer.cpp
Normal file
@ -0,0 +1,231 @@
|
||||
#include <Core/Names.h>
|
||||
#include <Interpreters/QueryNormalizer.h>
|
||||
#include <Parsers/ASTAsterisk.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Parsers/ASTTablesInSelectQuery.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Poco/String.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TOO_DEEP_AST;
|
||||
extern const int CYCLIC_ALIASES;
|
||||
}
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
bool functionIsInOrGlobalInOperator(const String & name)
|
||||
{
|
||||
return name == "in" || name == "notIn" || name == "globalIn" || name == "globalNotIn";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
QueryNormalizer::QueryNormalizer(
|
||||
ASTPtr & query, const QueryNormalizer::Aliases & aliases, const Settings & settings, const Names & all_columns_name)
|
||||
: query(query), aliases(aliases), settings(settings), all_columns_name(all_columns_name)
|
||||
{
|
||||
}
|
||||
|
||||
void QueryNormalizer::perform()
|
||||
{
|
||||
SetOfASTs tmp_set;
|
||||
MapOfASTs tmp_map;
|
||||
performImpl(query, tmp_map, tmp_set, "", 0);
|
||||
|
||||
try
|
||||
{
|
||||
query->checkSize(settings.max_expanded_ast_elements);
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage("(after expansion of aliases)");
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
/// finished_asts - already processed vertices (and by what they replaced)
|
||||
/// current_asts - vertices in the current call stack of this method
|
||||
/// current_alias - the alias referencing to the ancestor of ast (the deepest ancestor with aliases)
|
||||
void QueryNormalizer::performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOfASTs & current_asts, std::string current_alias, size_t level)
|
||||
{
|
||||
if (level > settings.max_ast_depth)
|
||||
throw Exception("Normalized AST is too deep. Maximum: " + settings.max_ast_depth.toString(), ErrorCodes::TOO_DEEP_AST);
|
||||
|
||||
if (finished_asts.count(ast))
|
||||
{
|
||||
ast = finished_asts[ast];
|
||||
return;
|
||||
}
|
||||
|
||||
ASTPtr initial_ast = ast;
|
||||
current_asts.insert(initial_ast.get());
|
||||
|
||||
String my_alias = ast->tryGetAlias();
|
||||
if (!my_alias.empty())
|
||||
current_alias = my_alias;
|
||||
|
||||
/// rewrite rules that act when you go from top to bottom.
|
||||
bool replaced = false;
|
||||
|
||||
ASTIdentifier * identifier_node = nullptr;
|
||||
ASTFunction * func_node = nullptr;
|
||||
|
||||
if ((func_node = typeid_cast<ASTFunction *>(ast.get())))
|
||||
{
|
||||
/// `IN t` can be specified, where t is a table, which is equivalent to `IN (SELECT * FROM t)`.
|
||||
if (functionIsInOrGlobalInOperator(func_node->name))
|
||||
if (ASTIdentifier * right = typeid_cast<ASTIdentifier *>(func_node->arguments->children.at(1).get()))
|
||||
if (!aliases.count(right->name))
|
||||
right->kind = ASTIdentifier::Table;
|
||||
|
||||
/// Special cases for count function.
|
||||
String func_name_lowercase = Poco::toLower(func_node->name);
|
||||
if (startsWith(func_name_lowercase, "count"))
|
||||
{
|
||||
/// Select implementation of countDistinct based on settings.
|
||||
/// Important that it is done as query rewrite. It means rewritten query
|
||||
/// will be sent to remote servers during distributed query execution,
|
||||
/// and on all remote servers, function implementation will be same.
|
||||
if (endsWith(func_node->name, "Distinct") && func_name_lowercase == "countdistinct")
|
||||
func_node->name = settings.count_distinct_implementation;
|
||||
|
||||
/// As special case, treat count(*) as count(), not as count(list of all columns).
|
||||
if (func_name_lowercase == "count" && func_node->arguments->children.size() == 1
|
||||
&& typeid_cast<const ASTAsterisk *>(func_node->arguments->children[0].get()))
|
||||
{
|
||||
func_node->arguments->children.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
else if ((identifier_node = typeid_cast<ASTIdentifier *>(ast.get())))
|
||||
{
|
||||
if (identifier_node->kind == ASTIdentifier::Column)
|
||||
{
|
||||
/// If it is an alias, but not a parent alias (for constructs like "SELECT column + 1 AS column").
|
||||
auto it_alias = aliases.find(identifier_node->name);
|
||||
if (it_alias != aliases.end() && current_alias != identifier_node->name)
|
||||
{
|
||||
/// Let's replace it with the corresponding tree node.
|
||||
if (current_asts.count(it_alias->second.get()))
|
||||
throw Exception("Cyclic aliases", ErrorCodes::CYCLIC_ALIASES);
|
||||
|
||||
if (!my_alias.empty() && my_alias != it_alias->second->getAliasOrColumnName())
|
||||
{
|
||||
/// Avoid infinite recursion here
|
||||
auto replace_to_identifier = typeid_cast<ASTIdentifier *>(it_alias->second.get());
|
||||
bool is_cycle = replace_to_identifier && replace_to_identifier->kind == ASTIdentifier::Column
|
||||
&& replace_to_identifier->name == identifier_node->name;
|
||||
|
||||
if (!is_cycle)
|
||||
{
|
||||
/// In a construct like "a AS b", where a is an alias, you must set alias b to the result of substituting alias a.
|
||||
ast = it_alias->second->clone();
|
||||
ast->setAlias(my_alias);
|
||||
replaced = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ast = it_alias->second;
|
||||
replaced = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (ASTExpressionList * expr_list = typeid_cast<ASTExpressionList *>(ast.get()))
|
||||
{
|
||||
/// Replace * with a list of columns.
|
||||
ASTs & asts = expr_list->children;
|
||||
for (int i = static_cast<int>(asts.size()) - 1; i >= 0; --i)
|
||||
{
|
||||
if (typeid_cast<ASTAsterisk *>(asts[i].get()) && !all_columns_name.empty())
|
||||
{
|
||||
asts.erase(asts.begin() + i);
|
||||
|
||||
for (size_t idx = 0; idx < all_columns_name.size(); idx++)
|
||||
asts.insert(asts.begin() + idx + i, std::make_shared<ASTIdentifier>(all_columns_name[idx]));
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (ASTTablesInSelectQueryElement * tables_elem = typeid_cast<ASTTablesInSelectQueryElement *>(ast.get()))
|
||||
{
|
||||
if (tables_elem->table_expression)
|
||||
{
|
||||
auto & database_and_table_name = static_cast<ASTTableExpression &>(*tables_elem->table_expression).database_and_table_name;
|
||||
if (database_and_table_name)
|
||||
{
|
||||
if (ASTIdentifier * right = typeid_cast<ASTIdentifier *>(database_and_table_name.get()))
|
||||
{
|
||||
right->kind = ASTIdentifier::Table;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// If we replace the root of the subtree, we will be called again for the new root, in case the alias is replaced by an alias.
|
||||
if (replaced)
|
||||
{
|
||||
performImpl(ast, finished_asts, current_asts, current_alias, level + 1);
|
||||
current_asts.erase(initial_ast.get());
|
||||
current_asts.erase(ast.get());
|
||||
finished_asts[initial_ast] = ast;
|
||||
return;
|
||||
}
|
||||
|
||||
/// Recurring calls. Don't go into subqueries. Don't go into components of compound identifiers.
|
||||
/// We also do not go to the left argument of lambda expressions, so as not to replace the formal parameters
|
||||
/// on aliases in expressions of the form 123 AS x, arrayMap(x -> 1, [2]).
|
||||
|
||||
if (func_node && func_node->name == "lambda")
|
||||
{
|
||||
/// We skip the first argument. We also assume that the lambda function can not have parameters.
|
||||
for (size_t i = 1, size = func_node->arguments->children.size(); i < size; ++i)
|
||||
{
|
||||
auto & child = func_node->arguments->children[i];
|
||||
|
||||
if (typeid_cast<const ASTSelectQuery *>(child.get()) || typeid_cast<const ASTTableExpression *>(child.get()))
|
||||
continue;
|
||||
|
||||
performImpl(child, finished_asts, current_asts, current_alias, level + 1);
|
||||
}
|
||||
}
|
||||
else if (identifier_node)
|
||||
{
|
||||
}
|
||||
else
|
||||
{
|
||||
for (auto & child : ast->children)
|
||||
{
|
||||
if (typeid_cast<const ASTSelectQuery *>(child.get()) || typeid_cast<const ASTTableExpression *>(child.get()))
|
||||
continue;
|
||||
|
||||
performImpl(child, finished_asts, current_asts, current_alias, level + 1);
|
||||
}
|
||||
}
|
||||
|
||||
/// If the WHERE clause or HAVING consists of a single alias, the reference must be replaced not only in children, but also in where_expression and having_expression.
|
||||
if (ASTSelectQuery * select = typeid_cast<ASTSelectQuery *>(ast.get()))
|
||||
{
|
||||
if (select->prewhere_expression)
|
||||
performImpl(select->prewhere_expression, finished_asts, current_asts, current_alias, level + 1);
|
||||
if (select->where_expression)
|
||||
performImpl(select->where_expression, finished_asts, current_asts, current_alias, level + 1);
|
||||
if (select->having_expression)
|
||||
performImpl(select->having_expression, finished_asts, current_asts, current_alias, level + 1);
|
||||
}
|
||||
|
||||
current_asts.erase(initial_ast.get());
|
||||
current_asts.erase(ast.get());
|
||||
finished_asts[initial_ast] = ast;
|
||||
}
|
||||
|
||||
}
|
30
dbms/src/Interpreters/QueryNormalizer.h
Normal file
30
dbms/src/Interpreters/QueryNormalizer.h
Normal file
@ -0,0 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
#include "Settings.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class QueryNormalizer
|
||||
{
|
||||
public:
|
||||
using Aliases = std::unordered_map<String, ASTPtr>;
|
||||
|
||||
QueryNormalizer(ASTPtr & query, const Aliases & aliases, const Settings & settings, const Names & all_columns_name);
|
||||
|
||||
void perform();
|
||||
|
||||
private:
|
||||
using SetOfASTs = std::set<const IAST *>;
|
||||
using MapOfASTs = std::map<ASTPtr, ASTPtr>;
|
||||
|
||||
ASTPtr & query;
|
||||
const Aliases & aliases;
|
||||
const Settings & settings;
|
||||
const Names & all_columns_name;
|
||||
|
||||
void performImpl(ASTPtr &ast, MapOfASTs &finished_asts, SetOfASTs ¤t_asts, std::string current_alias, size_t level);
|
||||
};
|
||||
|
||||
}
|
83
dbms/src/Interpreters/getQueryAliases.cpp
Normal file
83
dbms/src/Interpreters/getQueryAliases.cpp
Normal file
@ -0,0 +1,83 @@
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
#include <Interpreters/getQueryAliases.h>
|
||||
#include <Parsers/ASTTablesInSelectQuery.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Parsers/ASTSelectWithUnionQuery.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <Parsers/ASTSubquery.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int MULTIPLE_EXPRESSIONS_FOR_ALIAS;
|
||||
}
|
||||
|
||||
/// ignore_levels - aliases in how many upper levels of the subtree should be ignored.
|
||||
/// For example, with ignore_levels=1 ast can not be put in the dictionary, but its children can.
|
||||
void getQueryAliases(ASTPtr & ast, Aliases & aliases, int ignore_levels)
|
||||
{
|
||||
|
||||
/// Bottom-up traversal. We do not go into subqueries.
|
||||
for (auto & child : ast->children)
|
||||
{
|
||||
int new_ignore_levels = std::max(0, ignore_levels - 1);
|
||||
|
||||
/// The top-level aliases in the ARRAY JOIN section have a special meaning, we will not add them
|
||||
/// (skip the expression list itself and its children).
|
||||
if (typeid_cast<ASTArrayJoin *>(ast.get()))
|
||||
new_ignore_levels = 3;
|
||||
|
||||
/// Don't descent into table functions and subqueries.
|
||||
if (!typeid_cast<ASTTableExpression *>(child.get())
|
||||
&& !typeid_cast<ASTSelectWithUnionQuery *>(child.get()))
|
||||
getQueryAliases(child, aliases, new_ignore_levels);
|
||||
}
|
||||
|
||||
if (ignore_levels > 0)
|
||||
return;
|
||||
|
||||
String alias = ast->tryGetAlias();
|
||||
if (!alias.empty())
|
||||
{
|
||||
if (aliases.count(alias) && ast->getTreeHash() != aliases[alias]->getTreeHash())
|
||||
{
|
||||
std::stringstream message;
|
||||
message << "Different expressions with the same alias " << backQuoteIfNeed(alias) << ":\n";
|
||||
formatAST(*ast, message, false, true);
|
||||
message << "\nand\n";
|
||||
formatAST(*aliases[alias], message, false, true);
|
||||
message << "\n";
|
||||
|
||||
throw Exception(message.str(), ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS);
|
||||
}
|
||||
|
||||
aliases[alias] = ast;
|
||||
}
|
||||
else if (auto subquery = typeid_cast<ASTSubquery *>(ast.get()))
|
||||
{
|
||||
/// Set unique aliases for all subqueries. This is needed, because content of subqueries could change after recursive analysis,
|
||||
/// and auto-generated column names could become incorrect.
|
||||
|
||||
if (subquery->alias.empty())
|
||||
{
|
||||
size_t subquery_index = 1;
|
||||
while (true)
|
||||
{
|
||||
alias = "_subquery" + toString(subquery_index);
|
||||
if (!aliases.count("_subquery" + toString(subquery_index)))
|
||||
break;
|
||||
++subquery_index;
|
||||
}
|
||||
|
||||
subquery->setAlias(alias);
|
||||
subquery->prefer_alias_to_column_name = true;
|
||||
aliases[alias] = ast;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
13
dbms/src/Interpreters/getQueryAliases.h
Normal file
13
dbms/src/Interpreters/getQueryAliases.h
Normal file
@ -0,0 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
using Aliases = std::unordered_map<String, ASTPtr>;
|
||||
|
||||
void getQueryAliases(ASTPtr & ast, Aliases & aliases, int ignore_levels = 0);
|
||||
|
||||
}
|
@ -9,6 +9,10 @@
|
||||
#include <DataStreams/MaterializingBlockInputStream.h>
|
||||
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Interpreters/PredicateExpressionsOptimizer.h>
|
||||
#include <Parsers/ASTAsterisk.h>
|
||||
#include <iostream>
|
||||
#include <Parsers/queryToString.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -45,16 +49,21 @@ BlockInputStreams StorageView::read(
|
||||
|
||||
BlockInputStreams res;
|
||||
|
||||
ASTPtr & current_inner_query = inner_query;
|
||||
|
||||
if (context.getSettings().enable_optimize_predicate_expression)
|
||||
{
|
||||
replaceTableNameWithSubquery(typeid_cast<ASTSelectQuery *>(query_info.query.get()), inner_query);
|
||||
auto res_io = InterpreterSelectQuery(query_info.query, context, column_names, processed_stage).execute();
|
||||
auto new_inner_query = inner_query->clone();
|
||||
auto new_outer_query = query_info.query->clone();
|
||||
auto new_outer_select = typeid_cast<ASTSelectQuery *>(new_outer_query.get());
|
||||
|
||||
res.emplace_back(res_io.in);
|
||||
return res;
|
||||
replaceTableNameWithSubquery(new_outer_select, new_inner_query);
|
||||
|
||||
if (PredicateExpressionsOptimizer(new_outer_select, context.getSettings(), context).optimize())
|
||||
current_inner_query = new_inner_query;
|
||||
}
|
||||
|
||||
res = InterpreterSelectWithUnionQuery(inner_query, context, column_names).executeWithMultipleStreams();
|
||||
res = InterpreterSelectWithUnionQuery(current_inner_query, context, column_names).executeWithMultipleStreams();
|
||||
|
||||
/// It's expected that the columns read from storage are not constant.
|
||||
/// Because method 'getSampleBlockForColumns' is used to obtain a structure of result in InterpreterSelectQuery.
|
||||
@ -76,9 +85,12 @@ void StorageView::replaceTableNameWithSubquery(ASTSelectQuery * select_query, AS
|
||||
if (!table_expression->database_and_table_name)
|
||||
throw Exception("Logical error: incorrect table expression", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
const auto alias = table_expression->database_and_table_name->tryGetAlias();
|
||||
table_expression->database_and_table_name = {};
|
||||
table_expression->subquery = std::make_shared<ASTSubquery>();
|
||||
table_expression->subquery->children.push_back(subquery->clone());
|
||||
table_expression->subquery->children.push_back(subquery);
|
||||
if (!alias.empty())
|
||||
table_expression->subquery->setAlias(alias);
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,4 +1,3 @@
|
||||
-------Query that previously worked but now doesn\'t work.-------
|
||||
-------Not need optimize predicate, but it works.-------
|
||||
1
|
||||
1
|
||||
@ -6,7 +5,12 @@
|
||||
-------Need push down-------
|
||||
1
|
||||
1
|
||||
1
|
||||
1 1
|
||||
3 3
|
||||
3 3
|
||||
2000-01-01 1 test string 1 1
|
||||
3 3
|
||||
2000-01-01 1 test string 1 1
|
||||
2000-01-01 1 test string 1 1
|
||||
2000-01-01 1 test string 1 1
|
||||
@ -21,5 +25,7 @@
|
||||
2000-01-01 1 test string 1 1
|
||||
1 2000-01-01 2000-01-01 1 test string 1 1
|
||||
2000-01-01 1 test string 1 1
|
||||
1
|
||||
1
|
||||
-------Push to having expression, need check.-------
|
||||
-------Compatibility test-------
|
||||
|
@ -11,18 +11,21 @@ INSERT INTO test.test VALUES('2000-01-01', 2, 'test string 2', 2);
|
||||
|
||||
SET enable_optimize_predicate_expression = 1;
|
||||
|
||||
SELECT '-------Query that previously worked but now doesn\'t work.-------';
|
||||
SELECT * FROM (SELECT 1) WHERE `1` = 1; -- { serverError 47 }
|
||||
|
||||
SELECT '-------Not need optimize predicate, but it works.-------';
|
||||
SELECT 1;
|
||||
SELECT 1 AS id WHERE id = 1;
|
||||
SELECT arrayJoin([1,2,3]) AS id WHERE id = 1;
|
||||
|
||||
SELECT '-------Need push down-------';
|
||||
SELECT * FROM (SELECT 1 AS id UNION ALL SELECT 2) WHERE id = 1;
|
||||
SELECT * FROM (SELECT arrayJoin([1, 2, 3]) AS id) WHERE id = 1;
|
||||
SELECT id FROM (SELECT arrayJoin([1, 2, 3]) AS id) WHERE id = 1;
|
||||
SELECT date, id, name, value FROM (SELECT date, name, value,min(id) AS id FROM test.test GROUP BY date, name, value) WHERE id = 1;
|
||||
|
||||
SELECT * FROM (SELECT 1 AS id, (SELECT 1)) WHERE _subquery1 = 1;
|
||||
SELECT * FROM (SELECT toUInt64(b) AS a, sum(id) AS b FROM test.test) WHERE a = 3;
|
||||
SELECT * FROM (SELECT toUInt64(b), sum(id) AS b FROM test.test) WHERE `toUInt64(sum(id))` = 3;
|
||||
SELECT date, id, name, value FROM (SELECT date, name, value, min(id) AS id FROM test.test GROUP BY date, name, value) WHERE id = 1;
|
||||
SELECT * FROM (SELECT toUInt64(table_alias.b) AS a, sum(id) AS b FROM test.test AS table_alias) AS outer_table_alias WHERE outer_table_alias.b = 3;
|
||||
|
||||
SET force_primary_key = 1;
|
||||
|
||||
@ -54,9 +57,14 @@ SELECT * FROM (SELECT 1 AS id, toDate('2000-01-01') AS date FROM system.numbers
|
||||
|
||||
-- Optimize predicate expression with view
|
||||
SELECT * FROM test.test_view WHERE id = 1;
|
||||
SELECT id FROM test.test_view WHERE id = 1;
|
||||
SELECT s.id FROM test.test_view AS s WHERE id = 1;
|
||||
|
||||
SELECT '-------Push to having expression, need check.-------';
|
||||
SELECT id FROM (SELECT min(id) AS id FROM test.test) WHERE id = 1; -- { serverError 277 }
|
||||
SELECT * FROM (SELECT toUInt64(b) AS a, sum(id) AS b FROM test.test) WHERE a = 3; -- { serverError 277 }
|
||||
SELECT * FROM (SELECT toUInt64(b), sum(id) AS b FROM test.test) WHERE `toUInt64(sum(id))` = 3; -- { serverError 277 }
|
||||
SELECT * FROM (SELECT toUInt64(table_alias.b) AS a, sum(id) AS b FROM test.test AS table_alias) AS outer_table_alias WHERE outer_table_alias.b = 3; -- { serverError 277 }
|
||||
|
||||
SELECT '-------Compatibility test-------';
|
||||
SELECT * FROM (SELECT 1 AS id, toDate('2000-01-01') AS date FROM system.numbers LIMIT 1) ANY LEFT JOIN (SELECT * FROM test.test) AS b USING date WHERE b.date = toDate('2000-01-01'); -- {serverError 47}
|
||||
|
Loading…
Reference in New Issue
Block a user