ISSUES-3105 fix some bugs in the predicate optimizer

This commit is contained in:
zhang2014 2018-09-12 13:41:09 +08:00
parent 94183c7af1
commit 25f79593a2
11 changed files with 440 additions and 282 deletions

View File

@ -67,6 +67,8 @@
#include <Parsers/parseQuery.h>
#include <Parsers/queryToString.h>
#include <Interpreters/evaluateQualified.h>
#include <Interpreters/QueryNormalizer.h>
#include <Interpreters/getQueryAliases.h>
namespace DB
@ -226,7 +228,7 @@ ExpressionAnalyzer::ExpressionAnalyzer(
LogicalExpressionsOptimizer(select_query, settings).perform();
/// Creates a dictionary `aliases`: alias -> ASTPtr
addASTAliases(query);
getQueryAliases(query, aliases);
/// Common subexpression elimination. Rewrite rules.
normalizeTree();
@ -860,277 +862,23 @@ static NamesAndTypesList::iterator findColumn(const String & name, NamesAndTypes
}
/// ignore_levels - aliases in how many upper levels of the subtree should be ignored.
/// For example, with ignore_levels=1 ast can not be put in the dictionary, but its children can.
void ExpressionAnalyzer::addASTAliases(ASTPtr & ast, int ignore_levels)
{
/// Bottom-up traversal. We do not go into subqueries.
for (auto & child : ast->children)
{
int new_ignore_levels = std::max(0, ignore_levels - 1);
/// The top-level aliases in the ARRAY JOIN section have a special meaning, we will not add them
/// (skip the expression list itself and its children).
if (typeid_cast<ASTArrayJoin *>(ast.get()))
new_ignore_levels = 3;
/// Don't descent into table functions and subqueries.
if (!typeid_cast<ASTTableExpression *>(child.get())
&& !typeid_cast<ASTSelectWithUnionQuery *>(child.get()))
addASTAliases(child, new_ignore_levels);
}
if (ignore_levels > 0)
return;
String alias = ast->tryGetAlias();
if (!alias.empty())
{
if (aliases.count(alias) && ast->getTreeHash() != aliases[alias]->getTreeHash())
{
std::stringstream message;
message << "Different expressions with the same alias " << backQuoteIfNeed(alias) << ":\n";
formatAST(*ast, message, false, true);
message << "\nand\n";
formatAST(*aliases[alias], message, false, true);
message << "\n";
throw Exception(message.str(), ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS);
}
aliases[alias] = ast;
}
else if (auto subquery = typeid_cast<ASTSubquery *>(ast.get()))
{
/// Set unique aliases for all subqueries. This is needed, because content of subqueries could change after recursive analysis,
/// and auto-generated column names could become incorrect.
if (subquery->alias.empty())
{
size_t subquery_index = 1;
while (true)
{
alias = "_subquery" + toString(subquery_index);
if (!aliases.count("_subquery" + toString(subquery_index)))
break;
++subquery_index;
}
subquery->setAlias(alias);
subquery->prefer_alias_to_column_name = true;
aliases[alias] = ast;
}
}
}
void ExpressionAnalyzer::normalizeTree()
{
SetOfASTs tmp_set;
MapOfASTs tmp_map;
normalizeTreeImpl(query, tmp_map, tmp_set, "", 0);
Names all_columns_name;
try
auto columns_name = storage ? storage->getColumns().ordinary.getNames() : source_columns.getNames();
all_columns_name.insert(all_columns_name.begin(), columns_name.begin(), columns_name.end());
if (!settings.asterisk_left_columns_only)
{
query->checkSize(settings.max_expanded_ast_elements);
}
catch (Exception & e)
{
e.addMessage("(after expansion of aliases)");
throw;
}
}
/// finished_asts - already processed vertices (and by what they replaced)
/// current_asts - vertices in the current call stack of this method
/// current_alias - the alias referencing to the ancestor of ast (the deepest ancestor with aliases)
void ExpressionAnalyzer::normalizeTreeImpl(
ASTPtr & ast, MapOfASTs & finished_asts, SetOfASTs & current_asts, std::string current_alias, size_t level)
{
if (level > settings.max_ast_depth)
throw Exception("Normalized AST is too deep. Maximum: "
+ settings.max_ast_depth.toString(), ErrorCodes::TOO_DEEP_AST);
if (finished_asts.count(ast))
{
ast = finished_asts[ast];
return;
auto columns_from_joined_table = analyzed_join.getColumnsFromJoinedTable(context, select_query).getNames();
all_columns_name.insert(all_columns_name.end(), columns_from_joined_table.begin(), columns_from_joined_table.end());
}
ASTPtr initial_ast = ast;
current_asts.insert(initial_ast.get());
if (all_columns_name.empty())
throw Exception("Logical error: an asterisk cannot be replaced with empty columns.", ErrorCodes::LOGICAL_ERROR);
String my_alias = ast->tryGetAlias();
if (!my_alias.empty())
current_alias = my_alias;
/// rewrite rules that act when you go from top to bottom.
bool replaced = false;
ASTIdentifier * identifier_node = nullptr;
ASTFunction * func_node = nullptr;
if ((func_node = typeid_cast<ASTFunction *>(ast.get())))
{
/// `IN t` can be specified, where t is a table, which is equivalent to `IN (SELECT * FROM t)`.
if (functionIsInOrGlobalInOperator(func_node->name))
if (ASTIdentifier * right = typeid_cast<ASTIdentifier *>(func_node->arguments->children.at(1).get()))
if (!aliases.count(right->name))
right->kind = ASTIdentifier::Table;
/// Special cases for count function.
String func_name_lowercase = Poco::toLower(func_node->name);
if (startsWith(func_name_lowercase, "count"))
{
/// Select implementation of countDistinct based on settings.
/// Important that it is done as query rewrite. It means rewritten query
/// will be sent to remote servers during distributed query execution,
/// and on all remote servers, function implementation will be same.
if (endsWith(func_node->name, "Distinct") && func_name_lowercase == "countdistinct")
func_node->name = settings.count_distinct_implementation;
/// As special case, treat count(*) as count(), not as count(list of all columns).
if (func_name_lowercase == "count" && func_node->arguments->children.size() == 1
&& typeid_cast<const ASTAsterisk *>(func_node->arguments->children[0].get()))
{
func_node->arguments->children.clear();
}
}
}
else if ((identifier_node = typeid_cast<ASTIdentifier *>(ast.get())))
{
if (identifier_node->kind == ASTIdentifier::Column)
{
/// If it is an alias, but not a parent alias (for constructs like "SELECT column + 1 AS column").
auto it_alias = aliases.find(identifier_node->name);
if (it_alias != aliases.end() && current_alias != identifier_node->name)
{
/// Let's replace it with the corresponding tree node.
if (current_asts.count(it_alias->second.get()))
throw Exception("Cyclic aliases", ErrorCodes::CYCLIC_ALIASES);
if (!my_alias.empty() && my_alias != it_alias->second->getAliasOrColumnName())
{
/// Avoid infinite recursion here
auto replace_to_identifier = typeid_cast<ASTIdentifier *>(it_alias->second.get());
bool is_cycle = replace_to_identifier &&
replace_to_identifier->kind == ASTIdentifier::Column &&
replace_to_identifier->name == identifier_node->name;
if (!is_cycle)
{
/// In a construct like "a AS b", where a is an alias, you must set alias b to the result of substituting alias a.
ast = it_alias->second->clone();
ast->setAlias(my_alias);
replaced = true;
}
}
else
{
ast = it_alias->second;
replaced = true;
}
}
}
}
else if (ASTExpressionList * expr_list = typeid_cast<ASTExpressionList *>(ast.get()))
{
/// Replace * with a list of columns.
ASTs & asts = expr_list->children;
for (int i = static_cast<int>(asts.size()) - 1; i >= 0; --i)
{
if (typeid_cast<ASTAsterisk *>(asts[i].get()))
{
Names all_columns_name;
auto columns_name = storage ? storage->getColumns().ordinary.getNames() : source_columns.getNames();
all_columns_name.insert(all_columns_name.begin(), columns_name.begin(), columns_name.end());
if (!settings.asterisk_left_columns_only)
{
auto columns_from_joined_table = analyzed_join.getColumnsFromJoinedTable(context, select_query).getNames();
all_columns_name.insert(all_columns_name.end(), columns_from_joined_table.begin(), columns_from_joined_table.end());
}
asts.erase(asts.begin() + i);
for (size_t idx = 0; idx < all_columns_name.size(); idx++)
asts.insert(asts.begin() + idx + i, std::make_shared<ASTIdentifier>(all_columns_name[idx]));
}
}
}
else if (ASTTablesInSelectQueryElement * tables_elem = typeid_cast<ASTTablesInSelectQueryElement *>(ast.get()))
{
if (tables_elem->table_expression)
{
auto & database_and_table_name = static_cast<ASTTableExpression &>(*tables_elem->table_expression).database_and_table_name;
if (database_and_table_name)
{
if (ASTIdentifier * right = typeid_cast<ASTIdentifier *>(database_and_table_name.get()))
{
right->kind = ASTIdentifier::Table;
}
}
}
}
/// If we replace the root of the subtree, we will be called again for the new root, in case the alias is replaced by an alias.
if (replaced)
{
normalizeTreeImpl(ast, finished_asts, current_asts, current_alias, level + 1);
current_asts.erase(initial_ast.get());
current_asts.erase(ast.get());
finished_asts[initial_ast] = ast;
return;
}
/// Recurring calls. Don't go into subqueries. Don't go into components of compound identifiers.
/// We also do not go to the left argument of lambda expressions, so as not to replace the formal parameters
/// on aliases in expressions of the form 123 AS x, arrayMap(x -> 1, [2]).
if (func_node && func_node->name == "lambda")
{
/// We skip the first argument. We also assume that the lambda function can not have parameters.
for (size_t i = 1, size = func_node->arguments->children.size(); i < size; ++i)
{
auto & child = func_node->arguments->children[i];
if (typeid_cast<const ASTSelectQuery *>(child.get())
|| typeid_cast<const ASTTableExpression *>(child.get()))
continue;
normalizeTreeImpl(child, finished_asts, current_asts, current_alias, level + 1);
}
}
else if (identifier_node)
{
}
else
{
for (auto & child : ast->children)
{
if (typeid_cast<const ASTSelectQuery *>(child.get())
|| typeid_cast<const ASTTableExpression *>(child.get()))
continue;
normalizeTreeImpl(child, finished_asts, current_asts, current_alias, level + 1);
}
}
/// If the WHERE clause or HAVING consists of a single alias, the reference must be replaced not only in children, but also in where_expression and having_expression.
if (ASTSelectQuery * select = typeid_cast<ASTSelectQuery *>(ast.get()))
{
if (select->prewhere_expression)
normalizeTreeImpl(select->prewhere_expression, finished_asts, current_asts, current_alias, level + 1);
if (select->where_expression)
normalizeTreeImpl(select->where_expression, finished_asts, current_asts, current_alias, level + 1);
if (select->having_expression)
normalizeTreeImpl(select->having_expression, finished_asts, current_asts, current_alias, level + 1);
}
current_asts.erase(initial_ast.get());
current_asts.erase(ast.get());
finished_asts[initial_ast] = ast;
QueryNormalizer(query, aliases, settings, all_columns_name).perform();
}

View File

@ -310,15 +310,10 @@ private:
/// Parse JOIN ON expression and collect ASTs for joined columns.
void collectJoinedColumnsFromJoinOnExpr();
/** Create a dictionary of aliases.
*/
void addASTAliases(ASTPtr & ast, int ignore_levels = 0);
/** For star nodes(`*`), expand them to a list of all columns.
* For literal nodes, substitute aliases.
*/
void normalizeTree();
void normalizeTreeImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOfASTs & current_asts, std::string current_alias, size_t level);
/// Eliminates injective function calls and constant expressions from group by statement
void optimizeGroupBy();

View File

@ -7,6 +7,8 @@
#include <Parsers/ASTAsterisk.h>
#include <Parsers/ASTQualifiedAsterisk.h>
#include <Parsers/queryToString.h>
#include <Interpreters/QueryNormalizer.h>
#include <Interpreters/getQueryAliases.h>
namespace DB
{
@ -50,6 +52,9 @@ bool PredicateExpressionsOptimizer::optimizeImpl(
bool is_rewrite_subquery = false;
for (const auto & outer_predicate : outer_predicate_expressions)
{
if (isArrayJoinFunction(outer_predicate))
continue;
IdentifiersWithQualifiedNameSet outer_predicate_dependencies;
getDependenciesAndQualifiedOfExpression(outer_predicate, outer_predicate_dependencies, database_and_table_with_aliases);
@ -181,6 +186,21 @@ bool PredicateExpressionsOptimizer::cannotPushDownOuterPredicate(
return false;
}
bool PredicateExpressionsOptimizer::isArrayJoinFunction(const ASTPtr & node)
{
if (auto function = typeid_cast<ASTFunction *>(node.get()))
{
if (function->name == "arrayJoin")
return true;
}
for (auto & child : node->children)
if (isArrayJoinFunction(child))
return true;
return false;
}
bool PredicateExpressionsOptimizer::isAggregateFunction(ASTPtr & node)
{
if (auto function = typeid_cast<ASTFunction *>(node.get()))
@ -210,7 +230,12 @@ void PredicateExpressionsOptimizer::cloneOuterPredicateForInnerPredicate(
for (auto projection : projection_columns)
{
if (require.second == projection.second)
require.first->name = projection.first->getAliasOrColumnName();
{
ASTPtr & ast = projection.first;
if (!typeid_cast<ASTIdentifier *>(ast.get()) && ast->tryGetAlias().empty())
ast->setAlias(ast->getColumnName());
require.first->name = ast->getAliasOrColumnName();
}
}
}
}
@ -275,6 +300,11 @@ void PredicateExpressionsOptimizer::getSubqueryProjectionColumns(SubqueriesProje
ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast)
{
/// first should normalize query tree.
std::unordered_map<String, ASTPtr> aliases;
getQueryAliases(ast, aliases, 0);
QueryNormalizer(ast, aliases, settings, {}).perform();
ASTs projection_columns;
auto select_query = static_cast<ASTSelectQuery *>(ast.get());

View File

@ -61,6 +61,8 @@ private:
bool isAggregateFunction(ASTPtr & node);
bool isArrayJoinFunction(const ASTPtr & node);
PredicateExpressions splitConjunctionPredicate(ASTPtr & predicate_expression);
void getDependenciesAndQualifiedOfExpression(const ASTPtr & expression, IdentifiersWithQualifiedNameSet & dependencies_and_qualified,

View File

@ -0,0 +1,231 @@
#include <Core/Names.h>
#include <Interpreters/QueryNormalizer.h>
#include <Parsers/ASTAsterisk.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/typeid_cast.h>
#include <Poco/String.h>
namespace DB
{
namespace ErrorCodes
{
extern const int TOO_DEEP_AST;
extern const int CYCLIC_ALIASES;
}
namespace
{
bool functionIsInOrGlobalInOperator(const String & name)
{
return name == "in" || name == "notIn" || name == "globalIn" || name == "globalNotIn";
}
}
QueryNormalizer::QueryNormalizer(
ASTPtr & query, const QueryNormalizer::Aliases & aliases, const Settings & settings, const Names & all_columns_name)
: query(query), aliases(aliases), settings(settings), all_columns_name(all_columns_name)
{
}
void QueryNormalizer::perform()
{
SetOfASTs tmp_set;
MapOfASTs tmp_map;
performImpl(query, tmp_map, tmp_set, "", 0);
try
{
query->checkSize(settings.max_expanded_ast_elements);
}
catch (Exception & e)
{
e.addMessage("(after expansion of aliases)");
throw;
}
}
/// finished_asts - already processed vertices (and by what they replaced)
/// current_asts - vertices in the current call stack of this method
/// current_alias - the alias referencing to the ancestor of ast (the deepest ancestor with aliases)
void QueryNormalizer::performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOfASTs & current_asts, std::string current_alias, size_t level)
{
if (level > settings.max_ast_depth)
throw Exception("Normalized AST is too deep. Maximum: " + settings.max_ast_depth.toString(), ErrorCodes::TOO_DEEP_AST);
if (finished_asts.count(ast))
{
ast = finished_asts[ast];
return;
}
ASTPtr initial_ast = ast;
current_asts.insert(initial_ast.get());
String my_alias = ast->tryGetAlias();
if (!my_alias.empty())
current_alias = my_alias;
/// rewrite rules that act when you go from top to bottom.
bool replaced = false;
ASTIdentifier * identifier_node = nullptr;
ASTFunction * func_node = nullptr;
if ((func_node = typeid_cast<ASTFunction *>(ast.get())))
{
/// `IN t` can be specified, where t is a table, which is equivalent to `IN (SELECT * FROM t)`.
if (functionIsInOrGlobalInOperator(func_node->name))
if (ASTIdentifier * right = typeid_cast<ASTIdentifier *>(func_node->arguments->children.at(1).get()))
if (!aliases.count(right->name))
right->kind = ASTIdentifier::Table;
/// Special cases for count function.
String func_name_lowercase = Poco::toLower(func_node->name);
if (startsWith(func_name_lowercase, "count"))
{
/// Select implementation of countDistinct based on settings.
/// Important that it is done as query rewrite. It means rewritten query
/// will be sent to remote servers during distributed query execution,
/// and on all remote servers, function implementation will be same.
if (endsWith(func_node->name, "Distinct") && func_name_lowercase == "countdistinct")
func_node->name = settings.count_distinct_implementation;
/// As special case, treat count(*) as count(), not as count(list of all columns).
if (func_name_lowercase == "count" && func_node->arguments->children.size() == 1
&& typeid_cast<const ASTAsterisk *>(func_node->arguments->children[0].get()))
{
func_node->arguments->children.clear();
}
}
}
else if ((identifier_node = typeid_cast<ASTIdentifier *>(ast.get())))
{
if (identifier_node->kind == ASTIdentifier::Column)
{
/// If it is an alias, but not a parent alias (for constructs like "SELECT column + 1 AS column").
auto it_alias = aliases.find(identifier_node->name);
if (it_alias != aliases.end() && current_alias != identifier_node->name)
{
/// Let's replace it with the corresponding tree node.
if (current_asts.count(it_alias->second.get()))
throw Exception("Cyclic aliases", ErrorCodes::CYCLIC_ALIASES);
if (!my_alias.empty() && my_alias != it_alias->second->getAliasOrColumnName())
{
/// Avoid infinite recursion here
auto replace_to_identifier = typeid_cast<ASTIdentifier *>(it_alias->second.get());
bool is_cycle = replace_to_identifier && replace_to_identifier->kind == ASTIdentifier::Column
&& replace_to_identifier->name == identifier_node->name;
if (!is_cycle)
{
/// In a construct like "a AS b", where a is an alias, you must set alias b to the result of substituting alias a.
ast = it_alias->second->clone();
ast->setAlias(my_alias);
replaced = true;
}
}
else
{
ast = it_alias->second;
replaced = true;
}
}
}
}
else if (ASTExpressionList * expr_list = typeid_cast<ASTExpressionList *>(ast.get()))
{
/// Replace * with a list of columns.
ASTs & asts = expr_list->children;
for (int i = static_cast<int>(asts.size()) - 1; i >= 0; --i)
{
if (typeid_cast<ASTAsterisk *>(asts[i].get()) && !all_columns_name.empty())
{
asts.erase(asts.begin() + i);
for (size_t idx = 0; idx < all_columns_name.size(); idx++)
asts.insert(asts.begin() + idx + i, std::make_shared<ASTIdentifier>(all_columns_name[idx]));
}
}
}
else if (ASTTablesInSelectQueryElement * tables_elem = typeid_cast<ASTTablesInSelectQueryElement *>(ast.get()))
{
if (tables_elem->table_expression)
{
auto & database_and_table_name = static_cast<ASTTableExpression &>(*tables_elem->table_expression).database_and_table_name;
if (database_and_table_name)
{
if (ASTIdentifier * right = typeid_cast<ASTIdentifier *>(database_and_table_name.get()))
{
right->kind = ASTIdentifier::Table;
}
}
}
}
/// If we replace the root of the subtree, we will be called again for the new root, in case the alias is replaced by an alias.
if (replaced)
{
performImpl(ast, finished_asts, current_asts, current_alias, level + 1);
current_asts.erase(initial_ast.get());
current_asts.erase(ast.get());
finished_asts[initial_ast] = ast;
return;
}
/// Recurring calls. Don't go into subqueries. Don't go into components of compound identifiers.
/// We also do not go to the left argument of lambda expressions, so as not to replace the formal parameters
/// on aliases in expressions of the form 123 AS x, arrayMap(x -> 1, [2]).
if (func_node && func_node->name == "lambda")
{
/// We skip the first argument. We also assume that the lambda function can not have parameters.
for (size_t i = 1, size = func_node->arguments->children.size(); i < size; ++i)
{
auto & child = func_node->arguments->children[i];
if (typeid_cast<const ASTSelectQuery *>(child.get()) || typeid_cast<const ASTTableExpression *>(child.get()))
continue;
performImpl(child, finished_asts, current_asts, current_alias, level + 1);
}
}
else if (identifier_node)
{
}
else
{
for (auto & child : ast->children)
{
if (typeid_cast<const ASTSelectQuery *>(child.get()) || typeid_cast<const ASTTableExpression *>(child.get()))
continue;
performImpl(child, finished_asts, current_asts, current_alias, level + 1);
}
}
/// If the WHERE clause or HAVING consists of a single alias, the reference must be replaced not only in children, but also in where_expression and having_expression.
if (ASTSelectQuery * select = typeid_cast<ASTSelectQuery *>(ast.get()))
{
if (select->prewhere_expression)
performImpl(select->prewhere_expression, finished_asts, current_asts, current_alias, level + 1);
if (select->where_expression)
performImpl(select->where_expression, finished_asts, current_asts, current_alias, level + 1);
if (select->having_expression)
performImpl(select->having_expression, finished_asts, current_asts, current_alias, level + 1);
}
current_asts.erase(initial_ast.get());
current_asts.erase(ast.get());
finished_asts[initial_ast] = ast;
}
}

View File

@ -0,0 +1,30 @@
#pragma once
#include <Parsers/IAST.h>
#include "Settings.h"
namespace DB
{
class QueryNormalizer
{
public:
using Aliases = std::unordered_map<String, ASTPtr>;
QueryNormalizer(ASTPtr & query, const Aliases & aliases, const Settings & settings, const Names & all_columns_name);
void perform();
private:
using SetOfASTs = std::set<const IAST *>;
using MapOfASTs = std::map<ASTPtr, ASTPtr>;
ASTPtr & query;
const Aliases & aliases;
const Settings & settings;
const Names & all_columns_name;
void performImpl(ASTPtr &ast, MapOfASTs &finished_asts, SetOfASTs &current_asts, std::string current_alias, size_t level);
};
}

View File

@ -0,0 +1,83 @@
#include <ostream>
#include <sstream>
#include <Interpreters/getQueryAliases.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Common/typeid_cast.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/formatAST.h>
#include <Parsers/ASTSubquery.h>
#include <IO/WriteHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int MULTIPLE_EXPRESSIONS_FOR_ALIAS;
}
/// ignore_levels - aliases in how many upper levels of the subtree should be ignored.
/// For example, with ignore_levels=1 ast can not be put in the dictionary, but its children can.
void getQueryAliases(ASTPtr & ast, Aliases & aliases, int ignore_levels)
{
/// Bottom-up traversal. We do not go into subqueries.
for (auto & child : ast->children)
{
int new_ignore_levels = std::max(0, ignore_levels - 1);
/// The top-level aliases in the ARRAY JOIN section have a special meaning, we will not add them
/// (skip the expression list itself and its children).
if (typeid_cast<ASTArrayJoin *>(ast.get()))
new_ignore_levels = 3;
/// Don't descent into table functions and subqueries.
if (!typeid_cast<ASTTableExpression *>(child.get())
&& !typeid_cast<ASTSelectWithUnionQuery *>(child.get()))
getQueryAliases(child, aliases, new_ignore_levels);
}
if (ignore_levels > 0)
return;
String alias = ast->tryGetAlias();
if (!alias.empty())
{
if (aliases.count(alias) && ast->getTreeHash() != aliases[alias]->getTreeHash())
{
std::stringstream message;
message << "Different expressions with the same alias " << backQuoteIfNeed(alias) << ":\n";
formatAST(*ast, message, false, true);
message << "\nand\n";
formatAST(*aliases[alias], message, false, true);
message << "\n";
throw Exception(message.str(), ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS);
}
aliases[alias] = ast;
}
else if (auto subquery = typeid_cast<ASTSubquery *>(ast.get()))
{
/// Set unique aliases for all subqueries. This is needed, because content of subqueries could change after recursive analysis,
/// and auto-generated column names could become incorrect.
if (subquery->alias.empty())
{
size_t subquery_index = 1;
while (true)
{
alias = "_subquery" + toString(subquery_index);
if (!aliases.count("_subquery" + toString(subquery_index)))
break;
++subquery_index;
}
subquery->setAlias(alias);
subquery->prefer_alias_to_column_name = true;
aliases[alias] = ast;
}
}
}
}

View File

@ -0,0 +1,13 @@
#pragma once
#include <Parsers/IAST.h>
#include <unordered_map>
namespace DB
{
using Aliases = std::unordered_map<String, ASTPtr>;
void getQueryAliases(ASTPtr & ast, Aliases & aliases, int ignore_levels = 0);
}

View File

@ -9,6 +9,10 @@
#include <DataStreams/MaterializingBlockInputStream.h>
#include <Common/typeid_cast.h>
#include <Interpreters/PredicateExpressionsOptimizer.h>
#include <Parsers/ASTAsterisk.h>
#include <iostream>
#include <Parsers/queryToString.h>
namespace DB
{
@ -45,16 +49,21 @@ BlockInputStreams StorageView::read(
BlockInputStreams res;
ASTPtr & current_inner_query = inner_query;
if (context.getSettings().enable_optimize_predicate_expression)
{
replaceTableNameWithSubquery(typeid_cast<ASTSelectQuery *>(query_info.query.get()), inner_query);
auto res_io = InterpreterSelectQuery(query_info.query, context, column_names, processed_stage).execute();
auto new_inner_query = inner_query->clone();
auto new_outer_query = query_info.query->clone();
auto new_outer_select = typeid_cast<ASTSelectQuery *>(new_outer_query.get());
res.emplace_back(res_io.in);
return res;
replaceTableNameWithSubquery(new_outer_select, new_inner_query);
if (PredicateExpressionsOptimizer(new_outer_select, context.getSettings(), context).optimize())
current_inner_query = new_inner_query;
}
res = InterpreterSelectWithUnionQuery(inner_query, context, column_names).executeWithMultipleStreams();
res = InterpreterSelectWithUnionQuery(current_inner_query, context, column_names).executeWithMultipleStreams();
/// It's expected that the columns read from storage are not constant.
/// Because method 'getSampleBlockForColumns' is used to obtain a structure of result in InterpreterSelectQuery.
@ -76,9 +85,12 @@ void StorageView::replaceTableNameWithSubquery(ASTSelectQuery * select_query, AS
if (!table_expression->database_and_table_name)
throw Exception("Logical error: incorrect table expression", ErrorCodes::LOGICAL_ERROR);
const auto alias = table_expression->database_and_table_name->tryGetAlias();
table_expression->database_and_table_name = {};
table_expression->subquery = std::make_shared<ASTSubquery>();
table_expression->subquery->children.push_back(subquery->clone());
table_expression->subquery->children.push_back(subquery);
if (!alias.empty())
table_expression->subquery->setAlias(alias);
}

View File

@ -1,4 +1,3 @@
-------Query that previously worked but now doesn\'t work.-------
-------Not need optimize predicate, but it works.-------
1
1
@ -6,7 +5,12 @@
-------Need push down-------
1
1
1
1 1
3 3
3 3
2000-01-01 1 test string 1 1
3 3
2000-01-01 1 test string 1 1
2000-01-01 1 test string 1 1
2000-01-01 1 test string 1 1
@ -21,5 +25,7 @@
2000-01-01 1 test string 1 1
1 2000-01-01 2000-01-01 1 test string 1 1
2000-01-01 1 test string 1 1
1
1
-------Push to having expression, need check.-------
-------Compatibility test-------

View File

@ -11,18 +11,21 @@ INSERT INTO test.test VALUES('2000-01-01', 2, 'test string 2', 2);
SET enable_optimize_predicate_expression = 1;
SELECT '-------Query that previously worked but now doesn\'t work.-------';
SELECT * FROM (SELECT 1) WHERE `1` = 1; -- { serverError 47 }
SELECT '-------Not need optimize predicate, but it works.-------';
SELECT 1;
SELECT 1 AS id WHERE id = 1;
SELECT arrayJoin([1,2,3]) AS id WHERE id = 1;
SELECT '-------Need push down-------';
SELECT * FROM (SELECT 1 AS id UNION ALL SELECT 2) WHERE id = 1;
SELECT * FROM (SELECT arrayJoin([1, 2, 3]) AS id) WHERE id = 1;
SELECT id FROM (SELECT arrayJoin([1, 2, 3]) AS id) WHERE id = 1;
SELECT date, id, name, value FROM (SELECT date, name, value,min(id) AS id FROM test.test GROUP BY date, name, value) WHERE id = 1;
SELECT * FROM (SELECT 1 AS id, (SELECT 1)) WHERE _subquery1 = 1;
SELECT * FROM (SELECT toUInt64(b) AS a, sum(id) AS b FROM test.test) WHERE a = 3;
SELECT * FROM (SELECT toUInt64(b), sum(id) AS b FROM test.test) WHERE `toUInt64(sum(id))` = 3;
SELECT date, id, name, value FROM (SELECT date, name, value, min(id) AS id FROM test.test GROUP BY date, name, value) WHERE id = 1;
SELECT * FROM (SELECT toUInt64(table_alias.b) AS a, sum(id) AS b FROM test.test AS table_alias) AS outer_table_alias WHERE outer_table_alias.b = 3;
SET force_primary_key = 1;
@ -54,9 +57,14 @@ SELECT * FROM (SELECT 1 AS id, toDate('2000-01-01') AS date FROM system.numbers
-- Optimize predicate expression with view
SELECT * FROM test.test_view WHERE id = 1;
SELECT id FROM test.test_view WHERE id = 1;
SELECT s.id FROM test.test_view AS s WHERE id = 1;
SELECT '-------Push to having expression, need check.-------';
SELECT id FROM (SELECT min(id) AS id FROM test.test) WHERE id = 1; -- { serverError 277 }
SELECT * FROM (SELECT toUInt64(b) AS a, sum(id) AS b FROM test.test) WHERE a = 3; -- { serverError 277 }
SELECT * FROM (SELECT toUInt64(b), sum(id) AS b FROM test.test) WHERE `toUInt64(sum(id))` = 3; -- { serverError 277 }
SELECT * FROM (SELECT toUInt64(table_alias.b) AS a, sum(id) AS b FROM test.test AS table_alias) AS outer_table_alias WHERE outer_table_alias.b = 3; -- { serverError 277 }
SELECT '-------Compatibility test-------';
SELECT * FROM (SELECT 1 AS id, toDate('2000-01-01') AS date FROM system.numbers LIMIT 1) ANY LEFT JOIN (SELECT * FROM test.test) AS b USING date WHERE b.date = toDate('2000-01-01'); -- {serverError 47}