From 6fad51d64207bdc82f2844af0ced0dfa09360550 Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 6 Dec 2018 22:02:42 +0300 Subject: [PATCH] QueryAliasesMatcher via InDepthNodeVisitor (bottom to top) CLICKHOUSE-3996 --- dbms/src/Interpreters/InDepthNodeVisitor.h | 2 +- .../PredicateExpressionsOptimizer.cpp | 4 +- dbms/src/Interpreters/QueryAliasesVisitor.cpp | 84 +++++++++++-------- dbms/src/Interpreters/QueryAliasesVisitor.h | 55 ++++-------- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 4 +- 5 files changed, 69 insertions(+), 80 deletions(-) diff --git a/dbms/src/Interpreters/InDepthNodeVisitor.h b/dbms/src/Interpreters/InDepthNodeVisitor.h index 997013aff1f..4292da7fbdb 100644 --- a/dbms/src/Interpreters/InDepthNodeVisitor.h +++ b/dbms/src/Interpreters/InDepthNodeVisitor.h @@ -38,7 +38,7 @@ public: } private: - MatcherData & data; + Data & data; size_t visit_depth; std::ostream * ostr; diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index 84ca8b0a088..8e95773f72c 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -313,8 +313,8 @@ ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast TranslateQualifiedNamesMatcher::Data qn_visitor_data{{}, tables}; TranslateQualifiedNamesVisitor(qn_visitor_data).visit(ast); - QueryAliasesVisitor query_aliases_visitor(aliases); - query_aliases_visitor.visit(ast); + QueryAliasesMatcher::Data query_aliases_data{aliases}; + QueryAliasesVisitor(query_aliases_data).visit(ast); QueryNormalizer(ast, aliases, settings, {}, {}).perform(); for (const auto & projection_column : select_query->select_expression_list->children) diff --git a/dbms/src/Interpreters/QueryAliasesVisitor.cpp b/dbms/src/Interpreters/QueryAliasesVisitor.cpp index 22818f96ffd..cd7baba0061 100644 --- a/dbms/src/Interpreters/QueryAliasesVisitor.cpp +++ b/dbms/src/Interpreters/QueryAliasesVisitor.cpp @@ -1,5 +1,6 @@ #include #include + #include #include #include @@ -16,33 +17,62 @@ namespace ErrorCodes extern const int MULTIPLE_EXPRESSIONS_FOR_ALIAS; } -void QueryAliasesVisitor::visit(const ASTPtr & ast) const +static String wrongAliasMessage(const ASTPtr & ast, const ASTPtr & prev_ast, const String & alias) { - /// Bottom-up traversal. We do not go into subqueries. - visitChildren(ast); + std::stringstream message; + message << "Different expressions with the same alias " << backQuoteIfNeed(alias) << ":" << std::endl; + formatAST(*ast, message, false, true); + message << std::endl << "and" << std::endl; + formatAST(*prev_ast, message, false, true); + message << std::endl; + return message.str(); +} - if (!tryVisit(ast)) - { - DumpASTNode dump(*ast, ostr, visit_depth, "getQueryAliases"); - visitOther(ast); - } + +bool QueryAliasesMatcher::needChildVisit(ASTPtr & node, const ASTPtr &) +{ + /// Don't descent into table functions and subqueries and special case for ArrayJoin. + if (typeid_cast(node.get()) || + typeid_cast(node.get()) || + typeid_cast(node.get())) + return false; + return true; +} + +std::vector QueryAliasesMatcher::visit(ASTPtr & ast, Data & data) +{ + if (auto * t = typeid_cast(ast.get())) + return visit(*t, ast, data); + if (auto * t = typeid_cast(ast.get())) + return visit(*t, ast, data); + + visitOther(ast, data); + return {}; } /// The top-level aliases in the ARRAY JOIN section have a special meaning, we will not add them /// (skip the expression list itself and its children). -void QueryAliasesVisitor::visit(const ASTArrayJoin &, const ASTPtr & ast) const +std::vector QueryAliasesMatcher::visit(const ASTArrayJoin &, const ASTPtr & ast, Data & data) { + visitOther(ast, data); + + /// @warning It breaks botom-to-top order (childs processed after node here), could lead to some effects. + /// It's possible to add ast back to result vec to save order. It will need two phase ASTArrayJoin visit (setting phase in data). + std::vector out; for (auto & child1 : ast->children) for (auto & child2 : child1->children) for (auto & child3 : child2->children) - visit(child3); + out.push_back(child3); + return out; } /// set unique aliases for all subqueries. this is needed, because: /// 1) content of subqueries could change after recursive analysis, and auto-generated column names could become incorrect /// 2) result of different scalar subqueries can be cached inside expressions compilation cache and must have different names -void QueryAliasesVisitor::visit(ASTSubquery & subquery, const ASTPtr & ast) const +std::vector QueryAliasesMatcher::visit(ASTSubquery & subquery, const ASTPtr & ast, Data & data) { + Aliases & aliases = data.aliases; + static std::atomic_uint64_t subquery_index = 0; if (subquery.alias.empty()) @@ -59,42 +89,22 @@ void QueryAliasesVisitor::visit(ASTSubquery & subquery, const ASTPtr & ast) cons aliases[alias] = ast; } else - visitOther(ast); + visitOther(ast, data); + return {}; } -void QueryAliasesVisitor::visitOther(const ASTPtr & ast) const +void QueryAliasesMatcher::visitOther(const ASTPtr & ast, Data & data) { + Aliases & aliases = data.aliases; + String alias = ast->tryGetAlias(); if (!alias.empty()) { if (aliases.count(alias) && ast->getTreeHash() != aliases[alias]->getTreeHash()) - throw Exception(wrongAliasMessage(ast, alias), ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS); + throw Exception(wrongAliasMessage(ast, aliases[alias], alias), ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS); aliases[alias] = ast; } } -void QueryAliasesVisitor::visitChildren(const ASTPtr & ast) const -{ - for (auto & child : ast->children) - { - /// Don't descent into table functions and subqueries and special case for ArrayJoin. - if (!tryVisit(ast) && - !tryVisit(ast) && - !tryVisit(ast)) - visit(child); - } -} - -String QueryAliasesVisitor::wrongAliasMessage(const ASTPtr & ast, const String & alias) const -{ - std::stringstream message; - message << "Different expressions with the same alias " << backQuoteIfNeed(alias) << ":" << std::endl; - formatAST(*ast, message, false, true); - message << std::endl << "and" << std::endl; - formatAST(*aliases[alias], message, false, true); - message << std::endl; - return message.str(); -} - } diff --git a/dbms/src/Interpreters/QueryAliasesVisitor.h b/dbms/src/Interpreters/QueryAliasesVisitor.h index cb8548bd3cf..aae211e6e83 100644 --- a/dbms/src/Interpreters/QueryAliasesVisitor.h +++ b/dbms/src/Interpreters/QueryAliasesVisitor.h @@ -1,8 +1,7 @@ #pragma once -#include -#include #include +#include namespace DB { @@ -14,47 +13,27 @@ struct ASTArrayJoin; using Aliases = std::unordered_map; -/// Visitors consist of functions with unified interface 'void visit(Casted & x, ASTPtr & y)', there x is y, successfully casted to Casted. -/// Both types and fuction could have const specifiers. The second argument is used by visitor to replaces AST node (y) if needed. - -/// Visits AST nodes and collect their aliases in one map (with links to source nodes). -class QueryAliasesVisitor +/// Visits AST node to collect aliases. +class QueryAliasesMatcher { public: - QueryAliasesVisitor(Aliases & aliases_, std::ostream * ostr_ = nullptr) - : aliases(aliases_), - visit_depth(0), - ostr(ostr_) - {} + struct Data + { + Aliases & aliases; + }; - void visit(const ASTPtr & ast) const; + static constexpr const char * label = __FILE__; + + static std::vector visit(ASTPtr & ast, Data & data); + static bool needChildVisit(ASTPtr & node, const ASTPtr & child); private: - Aliases & aliases; - mutable size_t visit_depth; - std::ostream * ostr; - - void visit(const ASTTableExpression &, const ASTPtr &) const {} - void visit(const ASTSelectWithUnionQuery &, const ASTPtr &) const {} - - void visit(ASTSubquery & subquery, const ASTPtr & ast) const; - void visit(const ASTArrayJoin &, const ASTPtr & ast) const; - void visitOther(const ASTPtr & ast) const; - void visitChildren(const ASTPtr & ast) const; - - template - bool tryVisit(const ASTPtr & ast) const - { - if (T * t = typeid_cast(ast.get())) - { - DumpASTNode dump(*ast, ostr, visit_depth, "getQueryAliases"); - visit(*t, ast); - return true; - } - return false; - } - - String wrongAliasMessage(const ASTPtr & ast, const String & alias) const; + static std::vector visit(ASTSubquery & subquery, const ASTPtr & ast, Data & data); + static std::vector visit(const ASTArrayJoin &, const ASTPtr & ast, Data & data); + static void visitOther(const ASTPtr & ast, Data & data); }; +/// Visits AST nodes and collect their aliases in one map (with links to source nodes). +using QueryAliasesVisitor = InDepthNodeVisitor; + } diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index c6a15058b5f..bc7fce2a165 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -134,8 +134,8 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( /// Creates a dictionary `aliases`: alias -> ASTPtr { LogAST log; - QueryAliasesVisitor query_aliases_visitor(result.aliases, log.stream()); - query_aliases_visitor.visit(query); + QueryAliasesMatcher::Data query_aliases_data{result.aliases}; + QueryAliasesVisitor(query_aliases_data, log.stream()).visit(query); } /// Common subexpression elimination. Rewrite rules.