This commit is contained in:
feng lv 2020-10-26 09:33:34 +00:00
parent 5c192df20f
commit a9e4c7144c
10 changed files with 215 additions and 91 deletions

View File

@ -46,7 +46,7 @@ Specifics of each optional clause are covered in separate sections, which are li
- [SELECT clause](#select-clause)
- [DISTINCT clause](../../../sql-reference/statements/select/distinct.md)
- [LIMIT clause](../../../sql-reference/statements/select/limit.md)
- [UNION clause](../../../sql-reference/statements/select/union.md)
- [UNION clause](../../../sql-reference/statements/select/union-all.md)
- [INTO OUTFILE clause](../../../sql-reference/statements/select/into-outfile.md)
- [FORMAT clause](../../../sql-reference/statements/select/format.md)

View File

@ -29,7 +29,7 @@ Queries that are parts of `UNION ALL` cant be enclosed in round brackets. [OR
The difference between `UNION ALL` and `UNION DISTINCT` is that `UNION DISTINCT` will do a distinct transform for union result, it is equivalent to `SELECT DISTINCT` from a subquery containing `UNION ALL`.
# UNION Clause {#union-clause}
By defaul, `UNION` has same react as `UNION ALL`, but you can specify union mode by setting `union_default_mode`, values can be 'ALL', 'DISTINCT' or empty string. However, if you use `UNION` with setting `union_default_mode` to empty string, it will throw an exception.
By defaul, `UNION` has same react as `UNION DISTINCT`, but you can specify union mode by setting `union_default_mode`, values can be 'ALL', 'DISTINCT' or empty string. However, if you use `UNION` with setting `union_default_mode` to empty string, it will throw an exception.
## Implementation Details {#implementation-details}

View File

@ -401,7 +401,7 @@ class IColumn;
M(Bool, allow_experimental_database_atomic, true, "Obsolete setting, does nothing. Will be removed after 2021-02-12", 0) \
M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \
M(Bool, enable_global_with_statement, false, "Propagate WITH statements to UNION queries and all subqueries", 0) \
M(UnionMode, union_default_mode, UnionMode::ALL, "Set default Union Mode in SelectWithUnion query. Possible values: empty string, 'ALL', 'DISTINCT'. If empty, query without Union Mode will throw exception.", 0)
M(UnionMode, union_default_mode, UnionMode::DISTINCT, "Set default Union Mode in SelectWithUnion query. Possible values: empty string, 'ALL', 'DISTINCT'. If empty, query without Union Mode will throw exception.", 0)
// End of COMMON_SETTINGS
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS below.

View File

@ -1,3 +1,5 @@
#include <ctime>
#include <memory>
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
#include <Interpreters/InterpreterSelectQuery.h>
#include <Interpreters/Context.h>
@ -40,26 +42,38 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
if (!num_selects)
throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR);
/// For SELECT ... UNION/UNION ALL/UNION DISTINCT SELECT ... query,
/// rewrite ast with settings.union_default_mode
/// Rewrite ast with settings.union_default_mode
if (num_selects > 1)
{
if (ast.mode == ASTSelectWithUnionQuery::Mode::Unspecified)
const Settings & settings = context->getSettingsRef();
for (auto & mode : ast.union_modes)
{
const Settings & settings = context->getSettingsRef();
if (mode == ASTSelectWithUnionQuery::Mode::Unspecified)
{
if (settings.union_default_mode == UnionMode::ALL)
ast.mode = ASTSelectWithUnionQuery::Mode::ALL;
else if (settings.union_default_mode == UnionMode::DISTINCT)
ast.mode = ASTSelectWithUnionQuery::Mode::DISTINCT;
else
throw Exception(
"Expected ALL or DISTINCT in SelectWithUnion query, because setting (union_default_mode) is empty",
DB::ErrorCodes::EXPECTED_ALL_OR_DISTINCT);
if (settings.union_default_mode == UnionMode::ALL)
mode = ASTSelectWithUnionQuery::Mode::ALL;
else if (settings.union_default_mode == UnionMode::DISTINCT)
mode = ASTSelectWithUnionQuery::Mode::DISTINCT;
else
throw Exception(
"Expected ALL or DISTINCT in SelectWithUnion query, because setting (union_default_mode) is empty",
DB::ErrorCodes::EXPECTED_ALL_OR_DISTINCT);
}
}
/// Optimize: if there is UNION DISTINCT, all previous UNION DISTINCT can be rewritten to UNION ALL.
/// Therefore we have at most one UNION DISTINCT in a sequence.
for (auto rit = ast.union_modes.rbegin(); rit != ast.union_modes.rend(); ++rit)
{
if (*rit == ASTSelectWithUnionQuery::Mode::DISTINCT)
{
/// Number of streams need to do a DISTINCT transform after unite
union_distinct_num = ast.union_modes.rend() - rit + 1;
for (auto mode_to_modify = ++rit; mode_to_modify != ast.union_modes.rend(); ++mode_to_modify)
*mode_to_modify = ASTSelectWithUnionQuery::Mode::ALL;
break;
}
}
if (ast.mode == ASTSelectWithUnionQuery::Mode::DISTINCT)
distinct_union = true;
}
/// Initialize interpreters for each SELECT query.
@ -207,31 +221,79 @@ void InterpreterSelectWithUnionQuery::buildQueryPlan(QueryPlan & query_plan)
return;
}
std::vector<std::unique_ptr<QueryPlan>> plans(num_plans);
DataStreams data_streams(num_plans);
for (size_t i = 0; i < num_plans; ++i)
/// All UNION streams in the chain does not need to do DISTINCT transform
if (union_distinct_num == 0)
{
plans[i] = std::make_unique<QueryPlan>();
nested_interpreters[i]->buildQueryPlan(*plans[i]);
data_streams[i] = plans[i]->getCurrentDataStream();
std::vector<std::unique_ptr<QueryPlan>> plans(num_plans);
DataStreams data_streams(num_plans);
for (size_t i = 0; i < num_plans; ++i)
{
plans[i] = std::make_unique<QueryPlan>();
nested_interpreters[i]->buildQueryPlan(*plans[i]);
data_streams[i] = plans[i]->getCurrentDataStream();
}
auto max_threads = context->getSettingsRef().max_threads;
auto union_step = std::make_unique<UnionStep>(std::move(data_streams), result_header, max_threads);
query_plan.unitePlans(std::move(union_step), std::move(plans));
}
auto max_threads = context->getSettingsRef().max_threads;
auto union_step = std::make_unique<UnionStep>(std::move(data_streams), result_header, max_threads);
query_plan.unitePlans(std::move(union_step), std::move(plans));
/// Add distinct transform for UNION DISTINCT query
if (distinct_union)
/// The first union_distinct_num UNION streams need to do a DISTINCT transform after unite
else
{
QueryPlan distinct_query_plan;
std::vector<std::unique_ptr<QueryPlan>> plans(union_distinct_num);
DataStreams data_streams(union_distinct_num);
for (size_t i = 0; i < union_distinct_num; ++i)
{
plans[i] = std::make_unique<QueryPlan>();
nested_interpreters[i]->buildQueryPlan(*plans[i]);
data_streams[i] = plans[i]->getCurrentDataStream();
}
auto max_threads = context->getSettingsRef().max_threads;
auto union_step = std::make_unique<UnionStep>(std::move(data_streams), result_header, max_threads);
distinct_query_plan.unitePlans(std::move(union_step), std::move(plans));
/// Add distinct transform
const Settings & settings = context->getSettingsRef();
SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode);
auto distinct_step = std::make_unique<DistinctStep>(query_plan.getCurrentDataStream(), limits, 0, result_header.getNames(), false);
auto distinct_step
= std::make_unique<DistinctStep>(distinct_query_plan.getCurrentDataStream(), limits, 0, result_header.getNames(), false);
query_plan.addStep(std::move(distinct_step));
distinct_query_plan.addStep(std::move(distinct_step));
/// No other UNION streams after DISTINCT stream
if (num_plans == union_distinct_num)
{
query_plan = std::move(distinct_query_plan);
return;
}
/// Build final UNION step
std::vector<std::unique_ptr<QueryPlan>> final_plans(num_plans - union_distinct_num + 1);
DataStreams final_data_streams(num_plans - union_distinct_num + 1);
final_plans[0] = std::make_unique<QueryPlan>(std::move(distinct_query_plan));
final_data_streams[0] = final_plans[0]->getCurrentDataStream();
for (size_t i = 1; i < num_plans - union_distinct_num + 1; ++i)
{
final_plans[i] = std::make_unique<QueryPlan>();
nested_interpreters[union_distinct_num + i - 1]->buildQueryPlan(*final_plans[i]);
final_data_streams[i] = final_plans[i]->getCurrentDataStream();
}
auto final_union_step = std::make_unique<UnionStep>(std::move(final_data_streams), result_header, max_threads);
query_plan.unitePlans(std::move(final_union_step), std::move(final_plans));
}
}
BlockIO InterpreterSelectWithUnionQuery::execute()

View File

@ -1,5 +1,6 @@
#pragma once
#include <cstddef>
#include <Core/QueryProcessingStage.h>
#include <Interpreters/IInterpreter.h>
#include <Interpreters/SelectQueryOptions.h>
@ -54,7 +55,8 @@ private:
size_t max_streams = 1;
bool distinct_union = false;
/// First union_distinct_num streams need to do a DISTINCT transform after unite
size_t union_distinct_num = 0;
static Block getCommonHeaderForUnion(const Blocks & headers);
};

View File

@ -23,27 +23,21 @@ void ASTSelectWithUnionQuery::formatQueryImpl(const FormatSettings & settings, F
{
std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' ');
std::string mode_str;
switch (mode)
auto mode_to_str = [&](auto mode)
{
case Mode::Unspecified:
mode_str = "";
break;
case Mode::ALL:
mode_str = "ALL";
break;
case Mode::DISTINCT:
mode_str = "DISTINCT";
break;
}
if (mode == Mode::Unspecified)
return "";
else if (mode == Mode::ALL)
return "ALL";
else
return "DISTINCT";
};
for (ASTs::const_iterator it = list_of_selects->children.begin(); it != list_of_selects->children.end(); ++it)
{
if (it != list_of_selects->children.begin())
settings.ostr << settings.nl_or_ws << indent_str
<< (settings.hilite ? hilite_keyword : "")
<< "UNION " << mode_str
<< (settings.hilite ? hilite_none : "")
settings.ostr << settings.nl_or_ws << indent_str << (settings.hilite ? hilite_keyword : "") << "UNION "
<< mode_to_str(union_modes[it - list_of_selects->children.begin() - 1]) << (settings.hilite ? hilite_none : "")
<< settings.nl_or_ws;
(*it)->formatImpl(settings, state, frame);

View File

@ -24,7 +24,9 @@ public:
DISTINCT
};
Mode mode = Mode::Unspecified;
using Modes = std::vector<Mode>;
Modes union_modes;
ASTPtr list_of_selects;
};

View File

@ -1,3 +1,4 @@
#include <tuple>
#include <Parsers/IAST.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
@ -99,6 +100,51 @@ bool ParserList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
return true;
}
bool ParserUnionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
ASTs elements;
auto parse_element = [&]
{
ASTPtr element;
if (!elem_parser->parse(pos, element, expected))
return false;
elements.push_back(element);
return true;
};
/// Parse UNION type
auto parse_separator = [&]
{
if (s_union_parser->ignore(pos, expected))
{
// SELECT ... UNION ALL SELECT ...
if (s_all_parser->check(pos, expected))
{
union_modes.push_back(ASTSelectWithUnionQuery::Mode::ALL);
}
// SELECT ... UNION DISTINCT SELECT ...
else if (s_distinct_parser->check(pos, expected))
{
union_modes.push_back(ASTSelectWithUnionQuery::Mode::DISTINCT);
}
// SELECT ... UNION SELECT ...
else
union_modes.push_back(ASTSelectWithUnionQuery::Mode::Unspecified);
return true;
}
return false;
};
if (!parseUtil(pos, parse_element, parse_separator))
return false;
auto list = std::make_shared<ASTExpressionList>(result_separator);
list->children = std::move(elements);
node = list;
return true;
}
static bool parseOperator(IParser::Pos & pos, const char * op, Expected & expected)
{

View File

@ -5,6 +5,7 @@
#include <Parsers/IParserBase.h>
#include <Parsers/CommonParsers.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
namespace DB
{
@ -73,6 +74,53 @@ private:
char result_separator;
};
class ParserUnionList : public IParserBase
{
public:
ParserUnionList(ParserPtr && elem_parser_, ParserPtr && s_union_parser_, ParserPtr && s_all_parser_, ParserPtr && s_distinct_parser_)
: elem_parser(std::move(elem_parser_))
, s_union_parser(std::move(s_union_parser_))
, s_all_parser(std::move(s_all_parser_))
, s_distinct_parser(std::move(s_distinct_parser_))
{
}
template <typename ElemFunc, typename SepFunc>
static bool parseUtil(Pos & pos, const ElemFunc & parse_element, const SepFunc & parse_separator)
{
Pos begin = pos;
if (!parse_element())
{
pos = begin;
return false;
}
while (true)
{
begin = pos;
if (!parse_separator() || !parse_element())
{
pos = begin;
return true;
}
}
return false;
}
auto getUnionModes() const { return union_modes; }
protected:
const char * getName() const override { return "list of union elements"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
private:
ParserPtr elem_parser;
ParserPtr s_union_parser;
ParserPtr s_all_parser;
ParserPtr s_distinct_parser;
char result_separator = ',';
ASTSelectWithUnionQuery::Modes union_modes;
};
/** An expression with an infix binary left-associative operator.
* For example, a + b - c + d.

View File

@ -1,3 +1,4 @@
#include <memory>
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/ParserSelectWithUnionQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
@ -27,52 +28,21 @@ bool ParserSelectWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected &
{
ASTPtr list_node;
ParserList parser_union(std::make_unique<ParserUnionQueryElement>(), std::make_unique<ParserKeyword>("UNION"), false);
ParserList parser_union_all(std::make_unique<ParserUnionQueryElement>(), std::make_unique<ParserKeyword>("UNION ALL"), false);
ParserList parser_union_distinct(std::make_unique<ParserUnionQueryElement>(), std::make_unique<ParserKeyword>("UNION DISTINCT"), false);
ParserUnionList parser(
std::make_unique<ParserUnionQueryElement>(),
std::make_unique<ParserKeyword>("UNION"),
std::make_unique<ParserKeyword>("ALL"),
std::make_unique<ParserKeyword>("DISTINCT"));
auto begin = pos;
auto current_expected = expected;
ASTSelectWithUnionQuery::Mode union_mode = ASTSelectWithUnionQuery::Mode::ALL;
/// Parser SELECT lists and UNION type, must have UNION
auto union_parser = [&](auto & parser, auto mode)
{
if (!parser.parse(pos, list_node, expected))
{
pos = begin;
expected = current_expected;
return false;
}
/// number of SELECT lists should not less than 2
if (list_node->children.size() < 2)
{
pos = begin;
expected = current_expected;
return false;
}
union_mode = mode;
return true;
};
/// We first parse: SELECT ... UNION SELECT ...
/// SELECT ... UNION ALL SELECT ...
/// SELECT ... UNION DISTINCT SELECT ...
if (!union_parser(parser_union, ASTSelectWithUnionQuery::Mode::Unspecified)
&& !union_parser(parser_union_all, ASTSelectWithUnionQuery::Mode::ALL)
&& !union_parser(parser_union_distinct, ASTSelectWithUnionQuery::Mode::DISTINCT))
{
/// If above parse failed, we back to parse SELECT without UNION
if (!parser_union.parse(pos, list_node, expected))
return false;
}
if (!parser.parse(pos, list_node, expected))
return false;
auto select_with_union_query = std::make_shared<ASTSelectWithUnionQuery>();
node = select_with_union_query;
select_with_union_query->list_of_selects = std::make_shared<ASTExpressionList>();
select_with_union_query->children.push_back(select_with_union_query->list_of_selects);
select_with_union_query->mode = union_mode;
select_with_union_query->union_modes = parser.getUnionModes();
// flatten inner union query
for (auto & child : list_node->children)