Merge pull request #21246 from ucasFL/union-distinct-improve

Improve Normalization of ASTSelectWithUnionQuery
This commit is contained in:
Nikolai Kochetov 2021-03-02 21:37:07 +03:00 committed by GitHub
commit 8e6fa404c4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 314 additions and 124 deletions

View File

@ -24,110 +24,8 @@ namespace ErrorCodes
{ {
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
extern const int UNION_ALL_RESULT_STRUCTURES_MISMATCH; extern const int UNION_ALL_RESULT_STRUCTURES_MISMATCH;
extern const int EXPECTED_ALL_OR_DISTINCT;
} }
struct CustomizeASTSelectWithUnionQueryNormalize
{
using TypeToVisit = ASTSelectWithUnionQuery;
const UnionMode & union_default_mode;
static void getSelectsFromUnionListNode(ASTPtr & ast_select, ASTs & selects)
{
if (auto * inner_union = ast_select->as<ASTSelectWithUnionQuery>())
{
for (auto & child : inner_union->list_of_selects->children)
getSelectsFromUnionListNode(child, selects);
return;
}
selects.push_back(std::move(ast_select));
}
void visit(ASTSelectWithUnionQuery & ast, ASTPtr &) const
{
auto & union_modes = ast.list_of_modes;
ASTs selects;
auto & select_list = ast.list_of_selects->children;
int i;
for (i = union_modes.size() - 1; i >= 0; --i)
{
/// Rewrite UNION Mode
if (union_modes[i] == ASTSelectWithUnionQuery::Mode::Unspecified)
{
if (union_default_mode == UnionMode::ALL)
union_modes[i] = ASTSelectWithUnionQuery::Mode::ALL;
else if (union_default_mode == UnionMode::DISTINCT)
union_modes[i] = ASTSelectWithUnionQuery::Mode::DISTINCT;
else
throw Exception(
"Expected ALL or DISTINCT in SelectWithUnion query, because setting (union_default_mode) is empty",
DB::ErrorCodes::EXPECTED_ALL_OR_DISTINCT);
}
if (union_modes[i] == ASTSelectWithUnionQuery::Mode::ALL)
{
if (auto * inner_union = select_list[i + 1]->as<ASTSelectWithUnionQuery>())
{
/// Inner_union is an UNION ALL list, just lift up
for (auto child = inner_union->list_of_selects->children.rbegin();
child != inner_union->list_of_selects->children.rend();
++child)
selects.push_back(std::move(*child));
}
else
selects.push_back(std::move(select_list[i + 1]));
}
/// flatten all left nodes and current node to a UNION DISTINCT list
else if (union_modes[i] == ASTSelectWithUnionQuery::Mode::DISTINCT)
{
auto distinct_list = std::make_shared<ASTSelectWithUnionQuery>();
distinct_list->list_of_selects = std::make_shared<ASTExpressionList>();
distinct_list->children.push_back(distinct_list->list_of_selects);
for (int j = 0; j <= i + 1; ++j)
{
getSelectsFromUnionListNode(select_list[j], distinct_list->list_of_selects->children);
}
distinct_list->union_mode = ASTSelectWithUnionQuery::Mode::DISTINCT;
distinct_list->is_normalized = true;
selects.push_back(std::move(distinct_list));
break;
}
}
/// No UNION DISTINCT or only one child in select_list
if (i == -1)
{
if (auto * inner_union = select_list[0]->as<ASTSelectWithUnionQuery>())
{
/// Inner_union is an UNION ALL list, just lift it up
for (auto child = inner_union->list_of_selects->children.rbegin(); child != inner_union->list_of_selects->children.rend();
++child)
selects.push_back(std::move(*child));
}
else
selects.push_back(std::move(select_list[0]));
}
// reverse children list
std::reverse(selects.begin(), selects.end());
ast.is_normalized = true;
ast.union_mode = ASTSelectWithUnionQuery::Mode::ALL;
ast.list_of_selects->children = std::move(selects);
}
};
/// We need normalize children first, so we should visit AST tree bottom up
using CustomizeASTSelectWithUnionQueryNormalizeVisitor
= InDepthNodeVisitor<OneTypeMatcher<CustomizeASTSelectWithUnionQueryNormalize>, false>;
InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
const ASTPtr & query_ptr_, const Context & context_, const SelectQueryOptions & options_, const Names & required_result_column_names) const ASTPtr & query_ptr_, const Context & context_, const SelectQueryOptions & options_, const Names & required_result_column_names)
: IInterpreterUnionOrSelectQuery(query_ptr_, context_, options_) : IInterpreterUnionOrSelectQuery(query_ptr_, context_, options_)
@ -138,21 +36,6 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
if (options.subquery_depth == 0 && (settings.limit > 0 || settings.offset > 0)) if (options.subquery_depth == 0 && (settings.limit > 0 || settings.offset > 0))
settings_limit_offset_needed = true; settings_limit_offset_needed = true;
/// Normalize AST Tree
if (!ast->is_normalized)
{
CustomizeASTSelectWithUnionQueryNormalizeVisitor::Data union_default_mode{settings.union_default_mode};
CustomizeASTSelectWithUnionQueryNormalizeVisitor(union_default_mode).visit(query_ptr);
/// After normalization, if it only has one ASTSelectWithUnionQuery child,
/// we can lift it up, this can reduce one unnecessary recursion later.
if (ast->list_of_selects->children.size() == 1 && ast->list_of_selects->children.at(0)->as<ASTSelectWithUnionQuery>())
{
query_ptr = std::move(ast->list_of_selects->children.at(0));
ast = query_ptr->as<ASTSelectWithUnionQuery>();
}
}
size_t num_children = ast->list_of_selects->children.size(); size_t num_children = ast->list_of_selects->children.size();
if (!num_children) if (!num_children)
throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR); throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR);

View File

@ -0,0 +1,116 @@
#include <Interpreters/NormalizeSelectWithUnionQueryVisitor.h>
#include <Parsers/ASTExpressionList.h>
#include <Common/typeid_cast.h>
namespace DB
{
namespace ErrorCodes
{
extern const int EXPECTED_ALL_OR_DISTINCT;
}
void NormalizeSelectWithUnionQueryMatcher::getSelectsFromUnionListNode(ASTPtr & ast_select, ASTs & selects)
{
if (auto * inner_union = ast_select->as<ASTSelectWithUnionQuery>())
{
for (auto & child : inner_union->list_of_selects->children)
getSelectsFromUnionListNode(child, selects);
return;
}
selects.push_back(ast_select);
}
void NormalizeSelectWithUnionQueryMatcher::visit(ASTPtr & ast, Data & data)
{
if (auto * select_union = ast->as<ASTSelectWithUnionQuery>())
visit(*select_union, data);
}
void NormalizeSelectWithUnionQueryMatcher::visit(ASTSelectWithUnionQuery & ast, Data & data)
{
auto & union_modes = ast.list_of_modes;
ASTs selects;
auto & select_list = ast.list_of_selects->children;
int i;
for (i = union_modes.size() - 1; i >= 0; --i)
{
/// Rewrite UNION Mode
if (union_modes[i] == ASTSelectWithUnionQuery::Mode::Unspecified)
{
if (data.union_default_mode == UnionMode::ALL)
union_modes[i] = ASTSelectWithUnionQuery::Mode::ALL;
else if (data.union_default_mode == UnionMode::DISTINCT)
union_modes[i] = ASTSelectWithUnionQuery::Mode::DISTINCT;
else
throw Exception(
"Expected ALL or DISTINCT in SelectWithUnion query, because setting (union_default_mode) is empty",
DB::ErrorCodes::EXPECTED_ALL_OR_DISTINCT);
}
if (union_modes[i] == ASTSelectWithUnionQuery::Mode::ALL)
{
if (auto * inner_union = select_list[i + 1]->as<ASTSelectWithUnionQuery>();
inner_union && inner_union->union_mode == ASTSelectWithUnionQuery::Mode::ALL)
{
/// Inner_union is an UNION ALL list, just lift up
for (auto child = inner_union->list_of_selects->children.rbegin(); child != inner_union->list_of_selects->children.rend();
++child)
selects.push_back(*child);
}
else
selects.push_back(select_list[i + 1]);
}
/// flatten all left nodes and current node to a UNION DISTINCT list
else if (union_modes[i] == ASTSelectWithUnionQuery::Mode::DISTINCT)
{
auto distinct_list = std::make_shared<ASTSelectWithUnionQuery>();
distinct_list->list_of_selects = std::make_shared<ASTExpressionList>();
distinct_list->children.push_back(distinct_list->list_of_selects);
for (int j = 0; j <= i + 1; ++j)
{
getSelectsFromUnionListNode(select_list[j], distinct_list->list_of_selects->children);
}
distinct_list->union_mode = ASTSelectWithUnionQuery::Mode::DISTINCT;
distinct_list->is_normalized = true;
selects.push_back(std::move(distinct_list));
break;
}
}
/// No UNION DISTINCT or only one child in select_list
if (i == -1)
{
if (auto * inner_union = select_list[0]->as<ASTSelectWithUnionQuery>();
inner_union && inner_union->union_mode == ASTSelectWithUnionQuery::Mode::ALL)
{
/// Inner_union is an UNION ALL list, just lift it up
for (auto child = inner_union->list_of_selects->children.rbegin(); child != inner_union->list_of_selects->children.rend();
++child)
selects.push_back(*child);
}
else
selects.push_back(select_list[0]);
}
/// Just one union type child, lift it up
if (selects.size() == 1 && selects[0]->as<ASTSelectWithUnionQuery>())
{
ast = *(selects[0]->as<ASTSelectWithUnionQuery>());
return;
}
// reverse children list
std::reverse(selects.begin(), selects.end());
ast.is_normalized = true;
ast.union_mode = ASTSelectWithUnionQuery::Mode::ALL;
ast.list_of_selects->children = std::move(selects);
}
}

View File

@ -0,0 +1,34 @@
#pragma once
#include <unordered_set>
#include <Parsers/IAST.h>
#include <Interpreters/InDepthNodeVisitor.h>
#include <Core/Settings.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
namespace DB
{
class ASTFunction;
class NormalizeSelectWithUnionQueryMatcher
{
public:
struct Data
{
const UnionMode & union_default_mode;
};
static void getSelectsFromUnionListNode(ASTPtr & ast_select, ASTs & selects);
static void visit(ASTPtr & ast, Data &);
static void visit(ASTSelectWithUnionQuery &, Data &);
static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; }
};
/// We need normalize children first, so we should visit AST tree bottom up
using NormalizeSelectWithUnionQueryVisitor
= InDepthNodeVisitor<NormalizeSelectWithUnionQueryMatcher, false>;
}

View File

@ -39,16 +39,17 @@
#include <Storages/StorageInput.h> #include <Storages/StorageInput.h>
#include <Access/EnabledQuota.h> #include <Access/EnabledQuota.h>
#include <Interpreters/InterpreterFactory.h>
#include <Interpreters/ProcessList.h>
#include <Interpreters/OpenTelemetrySpanLog.h>
#include <Interpreters/QueryLog.h>
#include <Interpreters/InterpreterSetQuery.h>
#include <Interpreters/ApplyWithGlobalVisitor.h> #include <Interpreters/ApplyWithGlobalVisitor.h>
#include <Interpreters/Context.h>
#include <Interpreters/InterpreterFactory.h>
#include <Interpreters/InterpreterSetQuery.h>
#include <Interpreters/NormalizeSelectWithUnionQueryVisitor.h>
#include <Interpreters/OpenTelemetrySpanLog.h>
#include <Interpreters/ProcessList.h>
#include <Interpreters/QueryLog.h>
#include <Interpreters/ReplaceQueryParameterVisitor.h> #include <Interpreters/ReplaceQueryParameterVisitor.h>
#include <Interpreters/SelectQueryOptions.h> #include <Interpreters/SelectQueryOptions.h>
#include <Interpreters/executeQuery.h> #include <Interpreters/executeQuery.h>
#include <Interpreters/Context.h>
#include <Common/ProfileEvents.h> #include <Common/ProfileEvents.h>
#include <Common/SensitiveDataMasker.h> #include <Common/SensitiveDataMasker.h>
@ -472,9 +473,12 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
if (settings.enable_global_with_statement) if (settings.enable_global_with_statement)
{ {
ApplyWithGlobalVisitor().visit(ast); ApplyWithGlobalVisitor().visit(ast);
query = serializeAST(*ast);
} }
/// Normalize SelectWithUnionQuery
NormalizeSelectWithUnionQueryVisitor::Data data{context.getSettingsRef().union_default_mode};
NormalizeSelectWithUnionQueryVisitor{data}.visit(ast);
/// Check the limits. /// Check the limits.
checkASTSizeLimits(*ast, settings); checkASTSizeLimits(*ast, settings);

View File

@ -111,6 +111,7 @@ SRCS(
MetricLog.cpp MetricLog.cpp
MutationsInterpreter.cpp MutationsInterpreter.cpp
MySQL/InterpretersMySQLDDLQuery.cpp MySQL/InterpretersMySQLDDLQuery.cpp
NormalizeSelectWithUnionQueryVisitor.cpp
NullableUtils.cpp NullableUtils.cpp
OpenTelemetrySpanLog.cpp OpenTelemetrySpanLog.cpp
OptimizeIfChains.cpp OptimizeIfChains.cpp

View File

@ -0,0 +1,66 @@
SELECT 1
UNION ALL
SELECT 1
UNION ALL
SELECT 1
UNION ALL
SELECT 1
UNION ALL
SELECT 1
SELECT 1
UNION ALL
(
SELECT 1
UNION DISTINCT
SELECT 1
UNION DISTINCT
SELECT 1
)
UNION ALL
SELECT 1
SELECT x
FROM
(
SELECT 1 AS x
UNION ALL
(
SELECT 1
UNION DISTINCT
SELECT 1
UNION DISTINCT
SELECT 1
)
UNION ALL
SELECT 1
)
SELECT x
FROM
(
SELECT 1 AS x
UNION ALL
SELECT 1
UNION ALL
SELECT 1
)
SELECT 1
UNION DISTINCT
SELECT 1
UNION DISTINCT
SELECT 1
SELECT 1
(
SELECT 1
UNION DISTINCT
SELECT 1
UNION DISTINCT
SELECT 1
)
UNION ALL
SELECT 1

View File

@ -0,0 +1,86 @@
EXPLAIN SYNTAX
SELECT 1
UNION ALL
(
SELECT 1
UNION ALL
(
SELECT 1
UNION ALL
SELECT 1
)
UNION ALL
SELECT 1
);
SELECT ' ';
EXPLAIN SYNTAX
SELECT 1
UNION ALL
(
SELECT 1
UNION DISTINCT
(
SELECT 1
UNION ALL
SELECT 1
)
UNION ALL
SELECT 1
);
SELECT ' ';
EXPLAIN SYNTAX
SELECT x
FROM
(
SELECT 1 AS x
UNION ALL
(
SELECT 1
UNION DISTINCT
(
SELECT 1
UNION ALL
SELECT 1
)
UNION ALL
SELECT 1
)
);
SELECT ' ';
EXPLAIN SYNTAX
SELECT x
FROM
(
SELECT 1 AS x
UNION ALL
(
SELECT 1
UNION ALL
SELECT 1
)
);
SELECT ' ';
EXPLAIN SYNTAX
SELECT 1
UNION ALL
SELECT 1
UNION DISTINCT
SELECT 1;
SELECT ' ';
EXPLAIN SYNTAX
(((((((((((((((SELECT 1)))))))))))))));
SELECT ' ';
EXPLAIN SYNTAX
(((((((((((((((SELECT 1 UNION DISTINCT SELECT 1))) UNION DISTINCT SELECT 1)))) UNION ALL SELECT 1))))))));