From 731faeadbfa9effe9583cc4f244c3c748da7967e Mon Sep 17 00:00:00 2001 From: feng lv Date: Thu, 25 Feb 2021 07:47:08 +0000 Subject: [PATCH 1/3] union distinct improve --- .../InterpreterSelectWithUnionQuery.cpp | 117 ------------------ .../NormalizeSelectWithUnionQueryVisitor.cpp | 107 ++++++++++++++++ .../NormalizeSelectWithUnionQueryVisitor.h | 34 +++++ src/Interpreters/executeQuery.cpp | 28 +++-- 4 files changed, 162 insertions(+), 124 deletions(-) create mode 100644 src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp create mode 100644 src/Interpreters/NormalizeSelectWithUnionQueryVisitor.h diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index 59fcff61936..62dc843f982 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -24,110 +24,8 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int UNION_ALL_RESULT_STRUCTURES_MISMATCH; - extern const int EXPECTED_ALL_OR_DISTINCT; } -struct CustomizeASTSelectWithUnionQueryNormalize -{ - using TypeToVisit = ASTSelectWithUnionQuery; - - const UnionMode & union_default_mode; - - static void getSelectsFromUnionListNode(ASTPtr & ast_select, ASTs & selects) - { - if (auto * inner_union = ast_select->as()) - { - for (auto & child : inner_union->list_of_selects->children) - getSelectsFromUnionListNode(child, selects); - - return; - } - - selects.push_back(std::move(ast_select)); - } - - void visit(ASTSelectWithUnionQuery & ast, ASTPtr &) const - { - auto & union_modes = ast.list_of_modes; - ASTs selects; - auto & select_list = ast.list_of_selects->children; - - int i; - for (i = union_modes.size() - 1; i >= 0; --i) - { - /// Rewrite UNION Mode - if (union_modes[i] == ASTSelectWithUnionQuery::Mode::Unspecified) - { - if (union_default_mode == UnionMode::ALL) - union_modes[i] = ASTSelectWithUnionQuery::Mode::ALL; - else if (union_default_mode == UnionMode::DISTINCT) - union_modes[i] = ASTSelectWithUnionQuery::Mode::DISTINCT; - else - throw Exception( - "Expected ALL or DISTINCT in SelectWithUnion query, because setting (union_default_mode) is empty", - DB::ErrorCodes::EXPECTED_ALL_OR_DISTINCT); - } - - if (union_modes[i] == ASTSelectWithUnionQuery::Mode::ALL) - { - if (auto * inner_union = select_list[i + 1]->as()) - { - /// Inner_union is an UNION ALL list, just lift up - for (auto child = inner_union->list_of_selects->children.rbegin(); - child != inner_union->list_of_selects->children.rend(); - ++child) - selects.push_back(std::move(*child)); - } - else - selects.push_back(std::move(select_list[i + 1])); - } - /// flatten all left nodes and current node to a UNION DISTINCT list - else if (union_modes[i] == ASTSelectWithUnionQuery::Mode::DISTINCT) - { - auto distinct_list = std::make_shared(); - distinct_list->list_of_selects = std::make_shared(); - distinct_list->children.push_back(distinct_list->list_of_selects); - - for (int j = 0; j <= i + 1; ++j) - { - getSelectsFromUnionListNode(select_list[j], distinct_list->list_of_selects->children); - } - - distinct_list->union_mode = ASTSelectWithUnionQuery::Mode::DISTINCT; - distinct_list->is_normalized = true; - selects.push_back(std::move(distinct_list)); - break; - } - } - - /// No UNION DISTINCT or only one child in select_list - if (i == -1) - { - if (auto * inner_union = select_list[0]->as()) - { - /// Inner_union is an UNION ALL list, just lift it up - for (auto child = inner_union->list_of_selects->children.rbegin(); child != inner_union->list_of_selects->children.rend(); - ++child) - selects.push_back(std::move(*child)); - } - else - selects.push_back(std::move(select_list[0])); - } - - // reverse children list - std::reverse(selects.begin(), selects.end()); - - ast.is_normalized = true; - ast.union_mode = ASTSelectWithUnionQuery::Mode::ALL; - - ast.list_of_selects->children = std::move(selects); - } -}; - -/// We need normalize children first, so we should visit AST tree bottom up -using CustomizeASTSelectWithUnionQueryNormalizeVisitor - = InDepthNodeVisitor, false>; - InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( const ASTPtr & query_ptr_, const Context & context_, const SelectQueryOptions & options_, const Names & required_result_column_names) : IInterpreterUnionOrSelectQuery(query_ptr_, context_, options_) @@ -138,21 +36,6 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( if (options.subquery_depth == 0 && (settings.limit > 0 || settings.offset > 0)) settings_limit_offset_needed = true; - /// Normalize AST Tree - if (!ast->is_normalized) - { - CustomizeASTSelectWithUnionQueryNormalizeVisitor::Data union_default_mode{settings.union_default_mode}; - CustomizeASTSelectWithUnionQueryNormalizeVisitor(union_default_mode).visit(query_ptr); - - /// After normalization, if it only has one ASTSelectWithUnionQuery child, - /// we can lift it up, this can reduce one unnecessary recursion later. - if (ast->list_of_selects->children.size() == 1 && ast->list_of_selects->children.at(0)->as()) - { - query_ptr = std::move(ast->list_of_selects->children.at(0)); - ast = query_ptr->as(); - } - } - size_t num_children = ast->list_of_selects->children.size(); if (!num_children) throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR); diff --git a/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp new file mode 100644 index 00000000000..31d33d0781a --- /dev/null +++ b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp @@ -0,0 +1,107 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int EXPECTED_ALL_OR_DISTINCT; +} + +void NormalizeSelectWithUnionQueryMatcher::getSelectsFromUnionListNode(ASTPtr & ast_select, ASTs & selects) +{ + if (auto * inner_union = ast_select->as()) + { + for (auto & child : inner_union->list_of_selects->children) + getSelectsFromUnionListNode(child, selects); + + return; + } + + selects.push_back(std::move(ast_select)); +} + +void NormalizeSelectWithUnionQueryMatcher::visit(ASTPtr & ast, Data & data) +{ + if (auto * select_union = ast->as()) + visit(*select_union, data); +} + +void NormalizeSelectWithUnionQueryMatcher::visit(ASTSelectWithUnionQuery & ast, Data & data) +{ + auto & union_modes = ast.list_of_modes; + ASTs selects; + auto & select_list = ast.list_of_selects->children; + + int i; + for (i = union_modes.size() - 1; i >= 0; --i) + { + /// Rewrite UNION Mode + if (union_modes[i] == ASTSelectWithUnionQuery::Mode::Unspecified) + { + if (data.union_default_mode == UnionMode::ALL) + union_modes[i] = ASTSelectWithUnionQuery::Mode::ALL; + else if (data.union_default_mode == UnionMode::DISTINCT) + union_modes[i] = ASTSelectWithUnionQuery::Mode::DISTINCT; + else + throw Exception( + "Expected ALL or DISTINCT in SelectWithUnion query, because setting (union_default_mode) is empty", + DB::ErrorCodes::EXPECTED_ALL_OR_DISTINCT); + } + + if (union_modes[i] == ASTSelectWithUnionQuery::Mode::ALL) + { + if (auto * inner_union = select_list[i + 1]->as()) + { + /// Inner_union is an UNION ALL list, just lift up + for (auto child = inner_union->list_of_selects->children.rbegin(); child != inner_union->list_of_selects->children.rend(); + ++child) + selects.push_back(std::move(*child)); + } + else + selects.push_back(std::move(select_list[i + 1])); + } + /// flatten all left nodes and current node to a UNION DISTINCT list + else if (union_modes[i] == ASTSelectWithUnionQuery::Mode::DISTINCT) + { + auto distinct_list = std::make_shared(); + distinct_list->list_of_selects = std::make_shared(); + distinct_list->children.push_back(distinct_list->list_of_selects); + + for (int j = 0; j <= i + 1; ++j) + { + getSelectsFromUnionListNode(select_list[j], distinct_list->list_of_selects->children); + } + + distinct_list->union_mode = ASTSelectWithUnionQuery::Mode::DISTINCT; + distinct_list->is_normalized = true; + selects.push_back(std::move(distinct_list)); + break; + } + } + + /// No UNION DISTINCT or only one child in select_list + if (i == -1) + { + if (auto * inner_union = select_list[0]->as()) + { + /// Inner_union is an UNION ALL list, just lift it up + for (auto child = inner_union->list_of_selects->children.rbegin(); child != inner_union->list_of_selects->children.rend(); + ++child) + selects.push_back(std::move(*child)); + } + else + selects.push_back(std::move(select_list[0])); + } + + // reverse children list + std::reverse(selects.begin(), selects.end()); + + ast.is_normalized = true; + ast.union_mode = ASTSelectWithUnionQuery::Mode::ALL; + + ast.list_of_selects->children = std::move(selects); +} +} diff --git a/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.h b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.h new file mode 100644 index 00000000000..cec2e4265e2 --- /dev/null +++ b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.h @@ -0,0 +1,34 @@ +#pragma once + +#include + +#include +#include + +#include +#include + +namespace DB +{ + +class ASTFunction; + +class NormalizeSelectWithUnionQueryMatcher +{ +public: + struct Data + { + const UnionMode & union_default_mode; + }; + + static void getSelectsFromUnionListNode(ASTPtr & ast_select, ASTs & selects); + + static void visit(ASTPtr & ast, Data &); + static void visit(ASTSelectWithUnionQuery &, Data &); + static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; } +}; + +/// We need normalize children first, so we should visit AST tree bottom up +using NormalizeSelectWithUnionQueryVisitor + = InDepthNodeVisitor; +} diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index d786e1146be..3d6332b45a0 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -39,16 +39,17 @@ #include #include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include #include #include #include -#include #include #include @@ -475,6 +476,19 @@ static std::tuple executeQueryImpl( query = serializeAST(*ast); } + /// Normalize SelectWithUnionQuery + NormalizeSelectWithUnionQueryVisitor::Data data{context.getSettingsRef().union_default_mode}; + NormalizeSelectWithUnionQueryVisitor{data}.visit(ast); + + /// After normalization, if it only has one ASTSelectWithUnionQuery child, + /// we can lift it up, this can reduce one unnecessary recursion later in interpreter phase + auto select_union = ast->as(); + if (select_union && select_union->list_of_selects->children.size() == 1 + && select_union->list_of_selects->children.at(0)->as()) + ast = std::move(select_union->list_of_selects->children.at(0)); + + query = serializeAST(*ast); + /// Check the limits. checkASTSizeLimits(*ast, settings); @@ -874,7 +888,7 @@ static std::tuple executeQueryImpl( LOG_DEBUG(&Poco::Logger::get("executeQuery"), "Query pipeline:\n{}", msg_buf.str()); } } - } + } catch (...) { if (!internal) From 4c30c1009206de439d363c6562b24adf0a1e2fc3 Mon Sep 17 00:00:00 2001 From: feng lv Date: Fri, 26 Feb 2021 12:04:11 +0000 Subject: [PATCH 2/3] add test fix --- .../NormalizeSelectWithUnionQueryVisitor.cpp | 23 +++-- src/Interpreters/executeQuery.cpp | 9 +- src/Interpreters/ya.make | 1 + ...01732_explain_syntax_union_query.reference | 66 ++++++++++++++ .../01732_explain_syntax_union_query.sql | 86 +++++++++++++++++++ 5 files changed, 170 insertions(+), 15 deletions(-) create mode 100644 tests/queries/0_stateless/01732_explain_syntax_union_query.reference create mode 100644 tests/queries/0_stateless/01732_explain_syntax_union_query.sql diff --git a/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp index 31d33d0781a..d65755f98ba 100644 --- a/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp +++ b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp @@ -20,7 +20,7 @@ void NormalizeSelectWithUnionQueryMatcher::getSelectsFromUnionListNode(ASTPtr & return; } - selects.push_back(std::move(ast_select)); + selects.push_back(ast_select); } void NormalizeSelectWithUnionQueryMatcher::visit(ASTPtr & ast, Data & data) @@ -53,15 +53,16 @@ void NormalizeSelectWithUnionQueryMatcher::visit(ASTSelectWithUnionQuery & ast, if (union_modes[i] == ASTSelectWithUnionQuery::Mode::ALL) { - if (auto * inner_union = select_list[i + 1]->as()) + if (auto * inner_union = select_list[i + 1]->as(); + inner_union && inner_union->union_mode == ASTSelectWithUnionQuery::Mode::ALL) { /// Inner_union is an UNION ALL list, just lift up for (auto child = inner_union->list_of_selects->children.rbegin(); child != inner_union->list_of_selects->children.rend(); ++child) - selects.push_back(std::move(*child)); + selects.push_back(*child); } else - selects.push_back(std::move(select_list[i + 1])); + selects.push_back(select_list[i + 1]); } /// flatten all left nodes and current node to a UNION DISTINCT list else if (union_modes[i] == ASTSelectWithUnionQuery::Mode::DISTINCT) @@ -85,15 +86,23 @@ void NormalizeSelectWithUnionQueryMatcher::visit(ASTSelectWithUnionQuery & ast, /// No UNION DISTINCT or only one child in select_list if (i == -1) { - if (auto * inner_union = select_list[0]->as()) + if (auto * inner_union = select_list[0]->as(); + inner_union && inner_union->union_mode == ASTSelectWithUnionQuery::Mode::ALL) { /// Inner_union is an UNION ALL list, just lift it up for (auto child = inner_union->list_of_selects->children.rbegin(); child != inner_union->list_of_selects->children.rend(); ++child) - selects.push_back(std::move(*child)); + selects.push_back(*child); } else - selects.push_back(std::move(select_list[0])); + selects.push_back(select_list[0]); + } + + /// Just one union type child, lift it up + if (selects.size() == 1 && selects[0]->as()) + { + ast = *(selects[0]->as()); + return; } // reverse children list diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 3d6332b45a0..62986793376 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -480,13 +480,6 @@ static std::tuple executeQueryImpl( NormalizeSelectWithUnionQueryVisitor::Data data{context.getSettingsRef().union_default_mode}; NormalizeSelectWithUnionQueryVisitor{data}.visit(ast); - /// After normalization, if it only has one ASTSelectWithUnionQuery child, - /// we can lift it up, this can reduce one unnecessary recursion later in interpreter phase - auto select_union = ast->as(); - if (select_union && select_union->list_of_selects->children.size() == 1 - && select_union->list_of_selects->children.at(0)->as()) - ast = std::move(select_union->list_of_selects->children.at(0)); - query = serializeAST(*ast); /// Check the limits. @@ -888,7 +881,7 @@ static std::tuple executeQueryImpl( LOG_DEBUG(&Poco::Logger::get("executeQuery"), "Query pipeline:\n{}", msg_buf.str()); } } - } + } catch (...) { if (!internal) diff --git a/src/Interpreters/ya.make b/src/Interpreters/ya.make index 879333db507..3eab077df86 100644 --- a/src/Interpreters/ya.make +++ b/src/Interpreters/ya.make @@ -111,6 +111,7 @@ SRCS( MetricLog.cpp MutationsInterpreter.cpp MySQL/InterpretersMySQLDDLQuery.cpp + NormalizeSelectWithUnionQueryVisitor.cpp NullableUtils.cpp OpenTelemetrySpanLog.cpp OptimizeIfChains.cpp diff --git a/tests/queries/0_stateless/01732_explain_syntax_union_query.reference b/tests/queries/0_stateless/01732_explain_syntax_union_query.reference new file mode 100644 index 00000000000..fe5eb01a7ed --- /dev/null +++ b/tests/queries/0_stateless/01732_explain_syntax_union_query.reference @@ -0,0 +1,66 @@ +SELECT 1 +UNION ALL +SELECT 1 +UNION ALL +SELECT 1 +UNION ALL +SELECT 1 +UNION ALL +SELECT 1 + +SELECT 1 +UNION ALL +( + SELECT 1 + UNION DISTINCT + SELECT 1 + UNION DISTINCT + SELECT 1 +) +UNION ALL +SELECT 1 + +SELECT x +FROM +( + SELECT 1 AS x + UNION ALL + ( + SELECT 1 + UNION DISTINCT + SELECT 1 + UNION DISTINCT + SELECT 1 + ) + UNION ALL + SELECT 1 +) + +SELECT x +FROM +( + SELECT 1 AS x + UNION ALL + SELECT 1 + UNION ALL + SELECT 1 +) + +SELECT 1 +UNION DISTINCT +SELECT 1 +UNION DISTINCT +SELECT 1 + +SELECT 1 + + +( + SELECT 1 + UNION DISTINCT + SELECT 1 + UNION DISTINCT + SELECT 1 +) +UNION ALL +SELECT 1 diff --git a/tests/queries/0_stateless/01732_explain_syntax_union_query.sql b/tests/queries/0_stateless/01732_explain_syntax_union_query.sql new file mode 100644 index 00000000000..0dd1e19e765 --- /dev/null +++ b/tests/queries/0_stateless/01732_explain_syntax_union_query.sql @@ -0,0 +1,86 @@ +EXPLAIN SYNTAX +SELECT 1 +UNION ALL +( + SELECT 1 + UNION ALL + ( + SELECT 1 + UNION ALL + SELECT 1 + ) + UNION ALL + SELECT 1 +); + +SELECT ' '; + +EXPLAIN SYNTAX +SELECT 1 +UNION ALL +( + SELECT 1 + UNION DISTINCT + ( + SELECT 1 + UNION ALL + SELECT 1 + ) + UNION ALL + SELECT 1 +); + +SELECT ' '; + +EXPLAIN SYNTAX +SELECT x +FROM +( + SELECT 1 AS x + UNION ALL + ( + SELECT 1 + UNION DISTINCT + ( + SELECT 1 + UNION ALL + SELECT 1 + ) + UNION ALL + SELECT 1 + ) +); + +SELECT ' '; + +EXPLAIN SYNTAX +SELECT x +FROM +( + SELECT 1 AS x + UNION ALL + ( + SELECT 1 + UNION ALL + SELECT 1 + ) +); + +SELECT ' '; + +EXPLAIN SYNTAX +SELECT 1 +UNION ALL +SELECT 1 +UNION DISTINCT +SELECT 1; + +SELECT ' '; + +EXPLAIN SYNTAX +(((((((((((((((SELECT 1))))))))))))))); + +SELECT ' '; + +EXPLAIN SYNTAX +(((((((((((((((SELECT 1 UNION DISTINCT SELECT 1))) UNION DISTINCT SELECT 1)))) UNION ALL SELECT 1)))))))); From 59a2c45555ed7e1bceaf8b31bf0e5fa39cd60ab7 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 2 Mar 2021 13:09:29 +0300 Subject: [PATCH 3/3] Update executeQuery.cpp --- src/Interpreters/executeQuery.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 62986793376..1a0aa031d6f 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -473,15 +473,12 @@ static std::tuple executeQueryImpl( if (settings.enable_global_with_statement) { ApplyWithGlobalVisitor().visit(ast); - query = serializeAST(*ast); } /// Normalize SelectWithUnionQuery NormalizeSelectWithUnionQueryVisitor::Data data{context.getSettingsRef().union_default_mode}; NormalizeSelectWithUnionQueryVisitor{data}.visit(ast); - query = serializeAST(*ast); - /// Check the limits. checkASTSizeLimits(*ast, settings);