refactoring: unify QueryAliasesVisitor with other AST visitors

This commit is contained in:
chertus 2018-11-02 18:09:15 +03:00
parent 5a06d4953d
commit 50d9a6e5d5
4 changed files with 99 additions and 68 deletions

View File

@ -205,8 +205,8 @@ ExpressionAnalyzer::ExpressionAnalyzer(
/// Creates a dictionary `aliases`: alias -> ASTPtr
{
LogAST log;
QueryAliasesVisitor query_aliases_visitor(log.stream());
query_aliases_visitor.visit(query, aliases);
QueryAliasesVisitor query_aliases_visitor(aliases, log.stream());
query_aliases_visitor.visit(query);
}
/// Common subexpression elimination. Rewrite rules.

View File

@ -302,8 +302,8 @@ ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast
{
/// first should normalize query tree.
std::unordered_map<String, ASTPtr> aliases;
QueryAliasesVisitor query_aliases_visitor;
query_aliases_visitor.visit(ast, aliases, 0);
QueryAliasesVisitor query_aliases_visitor(aliases);
query_aliases_visitor.visit(ast);
QueryNormalizer(ast, aliases, settings, {}, {}).perform();
ASTs projection_columns;

View File

@ -6,7 +6,6 @@
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/formatAST.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/DumpASTNode.h>
#include <IO/WriteHelpers.h>
namespace DB
@ -17,76 +16,85 @@ namespace ErrorCodes
extern const int MULTIPLE_EXPRESSIONS_FOR_ALIAS;
}
/// ignore_levels - aliases in how many upper levels of the subtree should be ignored.
/// For example, with ignore_levels=1 ast can not be put in the dictionary, but its children can.
void QueryAliasesVisitor::getQueryAliases(const ASTPtr & ast, Aliases & aliases, int ignore_levels) const
void QueryAliasesVisitor::visit(const ASTPtr & ast) const
{
DumpASTNode dump(*ast, ostr, visit_depth, "getQueryAliases");
/// Bottom-up traversal. We do not go into subqueries.
for (auto & child : ast->children)
visitChildren(ast);
if (!tryVisit<ASTSubquery>(ast))
{
int new_ignore_levels = std::max(0, ignore_levels - 1);
/// The top-level aliases in the ARRAY JOIN section have a special meaning, we will not add them
/// (skip the expression list itself and its children).
if (typeid_cast<ASTArrayJoin *>(ast.get()))
new_ignore_levels = 3;
/// Don't descent into table functions and subqueries.
if (!typeid_cast<ASTTableExpression *>(child.get())
&& !typeid_cast<ASTSelectWithUnionQuery *>(child.get()))
getQueryAliases(child, aliases, new_ignore_levels);
DumpASTNode dump(*ast, ostr, visit_depth, "getQueryAliases");
visitOther(ast);
}
if (ignore_levels > 0)
return;
getNodeAlias(ast, aliases, dump);
}
void QueryAliasesVisitor::getNodeAlias(const ASTPtr & ast, Aliases & aliases, const DumpASTNode & dump) const
/// The top-level aliases in the ARRAY JOIN section have a special meaning, we will not add them
/// (skip the expression list itself and its children).
void QueryAliasesVisitor::visit(const ASTArrayJoin &, const ASTPtr & ast) const
{
for (auto & child1 : ast->children)
for (auto & child2 : child1->children)
for (auto & child3 : child2->children)
visit(child3);
}
/// set unique aliases for all subqueries. this is needed, because:
/// 1) content of subqueries could change after recursive analysis, and auto-generated column names could become incorrect
/// 2) result of different scalar subqueries can be cached inside expressions compilation cache and must have different names
void QueryAliasesVisitor::visit(ASTSubquery & subquery, const ASTPtr & ast) const
{
static std::atomic_uint64_t subquery_index = 0;
if (subquery.alias.empty())
{
String alias;
do
{
alias = "_subquery" + std::to_string(++subquery_index);
}
while (aliases.count(alias));
subquery.setAlias(alias);
subquery.prefer_alias_to_column_name = true;
aliases[alias] = ast;
}
else
visitOther(ast);
}
void QueryAliasesVisitor::visitOther(const ASTPtr & ast) const
{
String alias = ast->tryGetAlias();
if (!alias.empty())
{
if (aliases.count(alias) && ast->getTreeHash() != aliases[alias]->getTreeHash())
throw Exception(wrongAliasMessage(ast, alias), ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS);
aliases[alias] = ast;
}
}
void QueryAliasesVisitor::visitChildren(const ASTPtr & ast) const
{
for (auto & child : ast->children)
{
/// Don't descent into table functions and subqueries and special case for ArrayJoin.
if (!tryVisit<ASTTableExpression>(ast) &&
!tryVisit<ASTSelectWithUnionQuery>(ast) &&
!tryVisit<ASTArrayJoin>(ast))
visit(child);
}
}
String QueryAliasesVisitor::wrongAliasMessage(const ASTPtr & ast, const String & alias) const
{
std::stringstream message;
message << "Different expressions with the same alias " << backQuoteIfNeed(alias) << ":\n";
message << "Different expressions with the same alias " << backQuoteIfNeed(alias) << ":" << std::endl;
formatAST(*ast, message, false, true);
message << "\nand\n";
message << std::endl << "and" << std::endl;
formatAST(*aliases[alias], message, false, true);
message << "\n";
throw Exception(message.str(), ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS);
}
aliases[alias] = ast;
dump.print(visit_action, alias);
}
else if (auto subquery = typeid_cast<ASTSubquery *>(ast.get()))
{
/// Set unique aliases for all subqueries. This is needed, because:
/// 1) content of subqueries could change after recursive analysis, and auto-generated column names could become incorrect
/// 2) result of different scalar subqueries can be cached inside expressions compilation cache and must have different names
if (subquery->alias.empty())
{
static std::atomic_uint64_t subquery_index = 1;
while (true)
{
alias = "_subquery" + std::to_string(subquery_index++);
if (!aliases.count(alias))
break;
}
subquery->setAlias(alias);
subquery->prefer_alias_to_column_name = true;
aliases[alias] = ast;
dump.print(visit_action, alias);
}
}
message << std::endl;
return message.str();
}
}

View File

@ -6,29 +6,52 @@
namespace DB
{
class ASTSelectWithUnionQuery;
class ASTSubquery;
struct ASTTableExpression;
struct ASTArrayJoin;
using Aliases = std::unordered_map<String, ASTPtr>;
/// Visits AST nodes and collect their aliases in one map (with links to source nodes).
class QueryAliasesVisitor
{
public:
QueryAliasesVisitor(std::ostream * ostr_ = nullptr)
: visit_depth(0),
QueryAliasesVisitor(Aliases & aliases_, std::ostream * ostr_ = nullptr)
: aliases(aliases_),
visit_depth(0),
ostr(ostr_)
{}
void visit(const ASTPtr & ast, Aliases & aliases, int ignore_levels = 0) const
{
getQueryAliases(ast, aliases, ignore_levels);
}
void visit(const ASTPtr & ast) const;
private:
static constexpr const char * visit_action = "addAlias";
Aliases & aliases;
mutable size_t visit_depth;
std::ostream * ostr;
void getQueryAliases(const ASTPtr & ast, Aliases & aliases, int ignore_levels) const;
void getNodeAlias(const ASTPtr & ast, Aliases & aliases, const DumpASTNode & dump) const;
void visit(const ASTTableExpression &, const ASTPtr & ) const {}
void visit(const ASTSelectWithUnionQuery &, const ASTPtr & ) const {}
void visit(ASTSubquery & subquery, const ASTPtr & ast) const;
void visit(const ASTArrayJoin &, const ASTPtr & ast) const;
void visitOther(const ASTPtr & ast) const;
void visitChildren(const ASTPtr & ast) const;
template <typename T>
bool tryVisit(const ASTPtr & ast) const
{
if (T * t = typeid_cast<T *>(ast.get()))
{
DumpASTNode dump(*ast, ostr, visit_depth, "getQueryAliases");
visit(*t, ast);
return true;
}
return false;
}
String wrongAliasMessage(const ASTPtr & ast, const String & alias) const;
};
}