nomalize ASTSelectWithUnionQuery

This commit is contained in:
feng lv 2020-11-07 11:38:20 +00:00
parent 4bf7b54dff
commit 125eb02726
10 changed files with 284 additions and 54 deletions

View File

@ -170,6 +170,7 @@ Block InterpreterSelectWithUnionQuery::getSampleBlock(const ASTPtr & query_ptr_,
return cache[key] = InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().analyze()).getSampleBlock(); return cache[key] = InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().analyze()).getSampleBlock();
} }
#if 0
size_t InterpreterSelectWithUnionQuery::optimizeUnionList() size_t InterpreterSelectWithUnionQuery::optimizeUnionList()
{ {
auto union_distinct_num = 0; auto union_distinct_num = 0;
@ -213,10 +214,11 @@ size_t InterpreterSelectWithUnionQuery::optimizeUnionList()
} }
return union_distinct_num; return union_distinct_num;
} }
#endif
void InterpreterSelectWithUnionQuery::buildQueryPlan(QueryPlan & query_plan) void InterpreterSelectWithUnionQuery::buildQueryPlan(QueryPlan & query_plan)
{ {
auto num_distinct_union = optimizeUnionList(); // auto num_distinct_union = optimizeUnionList();
size_t num_plans = nested_interpreters.size(); size_t num_plans = nested_interpreters.size();
/// Skip union for single interpreter. /// Skip union for single interpreter.
@ -227,8 +229,8 @@ void InterpreterSelectWithUnionQuery::buildQueryPlan(QueryPlan & query_plan)
} }
/// All UNION streams in the chain does not need to do DISTINCT transform /// All UNION streams in the chain does not need to do DISTINCT transform
if (num_distinct_union == 0) // if (num_distinct_union == 0)
{ // {
std::vector<std::unique_ptr<QueryPlan>> plans(num_plans); std::vector<std::unique_ptr<QueryPlan>> plans(num_plans);
DataStreams data_streams(num_plans); DataStreams data_streams(num_plans);
@ -243,9 +245,23 @@ void InterpreterSelectWithUnionQuery::buildQueryPlan(QueryPlan & query_plan)
auto union_step = std::make_unique<UnionStep>(std::move(data_streams), result_header, max_threads); auto union_step = std::make_unique<UnionStep>(std::move(data_streams), result_header, max_threads);
query_plan.unitePlans(std::move(union_step), std::move(plans)); query_plan.unitePlans(std::move(union_step), std::move(plans));
}
const auto & query = query_ptr->as<ASTSelectWithUnionQuery &>();
if (query.union_mode == ASTSelectWithUnionQuery::Mode::DISTINCT)
{
/// Add distinct transform
const Settings & settings = context->getSettingsRef();
SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode);
auto distinct_step
= std::make_unique<DistinctStep>(query_plan.getCurrentDataStream(), limits, 0, result_header.getNames(), false);
query_plan.addStep(std::move(distinct_step));
}
// }
/// The first union_distinct_num UNION streams need to do a DISTINCT transform after unite /// The first union_distinct_num UNION streams need to do a DISTINCT transform after unite
#if 0
else else
{ {
QueryPlan distinct_query_plan; QueryPlan distinct_query_plan;
@ -298,6 +314,7 @@ void InterpreterSelectWithUnionQuery::buildQueryPlan(QueryPlan & query_plan)
auto final_union_step = std::make_unique<UnionStep>(std::move(final_data_streams), result_header, max_threads); auto final_union_step = std::make_unique<UnionStep>(std::move(final_data_streams), result_header, max_threads);
query_plan.unitePlans(std::move(final_union_step), std::move(final_plans)); query_plan.unitePlans(std::move(final_union_step), std::move(final_plans));
} }
#endif
} }
BlockIO InterpreterSelectWithUnionQuery::execute() BlockIO InterpreterSelectWithUnionQuery::execute()

View File

@ -49,7 +49,7 @@ private:
std::unique_ptr<IInterpreterUnionOrSelectQuery> std::unique_ptr<IInterpreterUnionOrSelectQuery>
buildCurrentChildInterpreter(const ASTPtr & ast_ptr_, const Names & current_required_result_column_names); buildCurrentChildInterpreter(const ASTPtr & ast_ptr_, const Names & current_required_result_column_names);
size_t optimizeUnionList(); // size_t optimizeUnionList();
}; };
} }

View File

@ -14,7 +14,7 @@ ASTPtr ASTSelectWithUnionQuery::clone() const
res->list_of_selects = list_of_selects->clone(); res->list_of_selects = list_of_selects->clone();
res->children.push_back(res->list_of_selects); res->children.push_back(res->list_of_selects);
res->union_modes = union_modes; res->union_mode = union_mode;
cloneOutputOptions(*res); cloneOutputOptions(*res);
return res; return res;
@ -38,24 +38,15 @@ void ASTSelectWithUnionQuery::formatQueryImpl(const FormatSettings & settings, F
for (ASTs::const_iterator it = list_of_selects->children.begin(); it != list_of_selects->children.end(); ++it) for (ASTs::const_iterator it = list_of_selects->children.begin(); it != list_of_selects->children.end(); ++it)
{ {
if (it != list_of_selects->children.begin()) if (it != list_of_selects->children.begin())
settings.ostr << settings.nl_or_ws << indent_str << (settings.hilite ? hilite_keyword : "") << "UNION " settings.ostr
<< mode_to_str(union_modes[it - list_of_selects->children.begin() - 1]) << (settings.hilite ? hilite_none : ""); << settings.nl_or_ws << indent_str << (settings.hilite ? hilite_keyword : "")
<< "UNION "
<< mode_to_str(union_mode) << (settings.hilite ? hilite_none : "");
if (auto * node = (*it)->as<ASTSelectWithUnionQuery>()) if (auto * node = (*it)->as<ASTSelectWithUnionQuery>())
{ {
// just one child in subquery, () is not need auto sub_query = std::make_shared<ASTSubquery>();
if (node->list_of_selects->children.size() == 1) sub_query->children.push_back(*it);
{ sub_query->formatImpl(settings, state, frame);
if (it != list_of_selects->children.begin())
settings.ostr << settings.nl_or_ws;
node->list_of_selects->children.at(0)->formatImpl(settings, state, frame);
}
// more than one child in subquery
else
{
auto sub_query = std::make_shared<ASTSubquery>();
sub_query->children.push_back(*it);
sub_query->formatImpl(settings, state, frame);
}
} }
else else
{ {

View File

@ -23,9 +23,9 @@ public:
DISTINCT DISTINCT
}; };
using Modes = std::vector<Mode>; using UnionModes = std::vector<Mode>;
Modes union_modes; Mode union_mode;
ASTPtr list_of_selects; ASTPtr list_of_selects;
}; };

View File

@ -128,7 +128,7 @@ bool ParserUnionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
} }
// SELECT ... UNION SELECT ... // SELECT ... UNION SELECT ...
else else
union_modes.push_back(ASTSelectWithUnionQuery::Mode::Unspecified); union_modes.push_back(ASTSelectWithUnionQuery::Mode::DISTINCT);
return true; return true;
} }
return false; return false;

View File

@ -119,7 +119,7 @@ private:
ParserPtr s_union_parser; ParserPtr s_union_parser;
ParserPtr s_all_parser; ParserPtr s_all_parser;
ParserPtr s_distinct_parser; ParserPtr s_distinct_parser;
ASTSelectWithUnionQuery::Modes union_modes; ASTSelectWithUnionQuery::UnionModes union_modes;
}; };
/** An expression with an infix binary left-associative operator. /** An expression with an infix binary left-associative operator.

View File

@ -6,6 +6,84 @@
namespace DB namespace DB
{ {
static void getSelectsFromUnionListNode(ASTPtr & ast_select, ASTs & selects)
{
if (auto * inner_union = ast_select->as<ASTSelectWithUnionQuery>())
{
/// We need flatten from last to first
for (auto child = inner_union->list_of_selects->children.rbegin(); child != inner_union->list_of_selects->children.rend(); ++child)
getSelectsFromUnionListNode(*child, selects);
return;
}
selects.push_back(std::move(ast_select));
}
void normalizeSelectList(ASTs & select_list, const ASTSelectWithUnionQuery::UnionModes & union_modes, ASTs & selects)
{
int i;
for (i = union_modes.size() - 1; i >= 0; --i)
{
if (union_modes[i] == ASTSelectWithUnionQuery::Mode::ALL)
{
if (auto * inner_union = select_list[i + 1]->as<ASTSelectWithUnionQuery>())
{
/// If inner_union is an UNION ALL list, just lift up
if (inner_union->union_mode == ASTSelectWithUnionQuery::Mode::ALL)
{
for (auto child = inner_union->list_of_selects->children.rbegin();
child != inner_union->list_of_selects->children.rend();
++child)
selects.push_back(std::move(*child));
}
/// inner_union is an UNION DISTINCT list,
// we cann't lift up
else
selects.push_back(std::move(select_list[i + 1]));
}
else
selects.push_back(std::move(select_list[i + 1]));
}
/// flatten all left nodes and current node to a UNION DISTINCT list
else if (union_modes[i] == ASTSelectWithUnionQuery::Mode::DISTINCT)
{
auto distinct_list = std::make_shared<ASTSelectWithUnionQuery>();
distinct_list->list_of_selects = std::make_shared<ASTExpressionList>();
distinct_list->children.push_back(distinct_list->list_of_selects);
for (int j = i + 1; j >= 0; j--)
{
getSelectsFromUnionListNode(select_list[j], distinct_list->list_of_selects->children);
}
distinct_list->union_mode = ASTSelectWithUnionQuery::Mode::DISTINCT;
// Reverse children list
std::reverse(distinct_list->list_of_selects->children.begin(), distinct_list->list_of_selects->children.end());
selects.push_back(std::move(distinct_list));
return;
}
}
/// No UNION DISTINCT or only one SELECT in select_list
if (i == -1)
{
if (auto * inner_union = select_list[0]->as<ASTSelectWithUnionQuery>())
{
/// If inner_union is an UNION ALL list, just lift it up
if (inner_union->union_mode == ASTSelectWithUnionQuery::Mode::ALL)
{
for (auto child = inner_union->list_of_selects->children.rbegin();
child != inner_union->list_of_selects->children.rend();
++child)
selects.push_back(std::move(*child));
}
/// inner_union is an UNION DISTINCT list,
// we cann't lift it up
else
selects.push_back(std::move(select_list[i + 1]));
}
else
selects.push_back(std::move(select_list[0]));
}
}
bool ParserSelectWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) bool ParserSelectWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{ {
@ -20,16 +98,44 @@ bool ParserSelectWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected &
if (!parser.parse(pos, list_node, expected)) if (!parser.parse(pos, list_node, expected))
return false; return false;
/// NOTE: We cann't simply flatten inner union query now, since we may have different union mode in query,
/// so flatten may change it's semantics. For example:
/// flatten `SELECT 1 UNION (SELECT 1 UNION ALL SELETC 1)` -> `SELECT 1 UNION SELECT 1 UNION ALL SELECT 1`
/// Before normalize, if we got only one child which is ASTSelectWithUnionQuery, just lift it up
auto & expr_list = list_node->as<ASTExpressionList &>();
if (expr_list.children.size() == 1)
{
if (expr_list.children.at(0)->as<ASTSelectWithUnionQuery>())
{
node = std::move(expr_list.children.at(0));
return true;
}
}
auto select_with_union_query = std::make_shared<ASTSelectWithUnionQuery>(); auto select_with_union_query = std::make_shared<ASTSelectWithUnionQuery>();
node = select_with_union_query; node = select_with_union_query;
select_with_union_query->list_of_selects = list_node; select_with_union_query->list_of_selects = std::make_shared<ASTExpressionList>();
select_with_union_query->children.push_back(select_with_union_query->list_of_selects); select_with_union_query->children.push_back(select_with_union_query->list_of_selects);
select_with_union_query->union_modes = parser.getUnionModes();
/// NOTE: We cann't flatten inner union query now, since we may have different union mode in query, auto union_modes = parser.getUnionModes();
/// so flatten may change it's semantics. For example:
/// flatten `SELECT 1 UNION (SELECT 1 UNION ALL SELETC 1)` -> `SELECT 1 UNION SELECT 1 UNION ALL SELECT 1` normalizeSelectList(expr_list.children, union_modes, select_with_union_query->list_of_selects->children);
/// We need reverse children list
std::reverse(select_with_union_query->list_of_selects->children.begin(), select_with_union_query->list_of_selects->children.end());
select_with_union_query->union_mode = ASTSelectWithUnionQuery::Mode::ALL;
/// After normalize, if we only have one ASTSelectWithUnionQuery child, lift if up
if (select_with_union_query->list_of_selects->children.size() == 1)
{
if (select_with_union_query->list_of_selects->children.at(0)->as<ASTSelectWithUnionQuery>())
{
node = std::move(select_with_union_query->list_of_selects->children.at(0));
}
}
return true; return true;
} }

View File

@ -44,16 +44,6 @@ all
1 1
1 1
1 1
1
1
1
1
1
1
1
1
1
all
all all
all all
1 1
@ -77,16 +67,3 @@ all
1 1
1 1
1 1
1
1
1
1
1
1
1
1
1
1
1
1
1

View File

@ -0,0 +1,126 @@
Union
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Union
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Union
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Distinct
Union
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Distinct
Union
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Distinct
Union
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Union
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Distinct
Union
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Distinct
Union
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Union
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Union
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Distinct
Union
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)
Expression (Projection)
Expression (Before ORDER BY and SELECT)
ReadFromStorage (Read from SystemOne)

View File

@ -0,0 +1,13 @@
EXPLAIN SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1;
EXPLAIN (SELECT 1 UNION ALL SELECT 1) UNION ALL SELECT 1;
EXPLAIN SELECT 1 UNION ALL (SELECT 1 UNION ALL SELECT 1);
EXPLAIN SELECT 1 UNION DISTINCT SELECT 1 UNION DISTINCT SELECT 1;
EXPLAIN (SELECT 1 UNION DISTINCT SELECT 1) UNION DISTINCT SELECT 1;
EXPLAIN SELECT 1 UNION DISTINCT (SELECT 1 UNION DISTINCT SELECT 1);
EXPLAIN (SELECT 1 UNION ALL (SELECT 1 UNION ALL (SELECT 1 UNION ALL SELECT 1 UNION DISTINCT SELECT 1))) UNION ALL (((SELECT 1) UNION ALL (SELECT 1 UNION ALL (SELECT 1 UNION ALL (SELECT 1 UNION ALL SELECT 1 ) UNION DISTINCT SELECT 1))));
EXPLAIN (((((((((((((((SELECT 1 UNION ALL SELECT 1) UNION ALL SELECT 1))))))))))))));
EXPLAIN (((((((((((((((((((((((((((((SELECT 1 UNION ALL SELECT 1)))))))))))))))))))))))))))));
EXPLAIN (((((((((((((((((((((((((((((SELECT 1 UNION DISTINCT SELECT 1)))))))))))))))))))))))))))));