add union distinct and setting union_default_mode

This commit is contained in:
feng lv 2020-10-24 13:18:04 +00:00
parent b4f0e08369
commit 15e4e03469
9 changed files with 122 additions and 11 deletions

View File

@ -519,6 +519,8 @@ namespace ErrorCodes
extern const int CONDITIONAL_TREE_PARENT_NOT_FOUND = 2001;
extern const int ILLEGAL_PROJECTION_MANIPULATOR = 2002;
extern const int UNRECOGNIZED_ARGUMENTS = 2003;
extern const int UNKNOWN_UNION = 2004;
extern const int EXPECTED_ALL_OR_DISTINCT = 2005;
}
}

View File

@ -400,7 +400,8 @@ class IColumn;
M(Bool, optimize_trivial_insert_select, true, "Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query", 0) \
M(Bool, allow_experimental_database_atomic, true, "Obsolete setting, does nothing. Will be removed after 2021-02-12", 0) \
M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \
M(Bool, enable_global_with_statement, false, "Propagate WITH statements to UNION queries and all subqueries", 0)
M(Bool, enable_global_with_statement, false, "Propagate WITH statements to UNION queries and all subqueries", 0) \
M(UnionMode, union_default_mode, UnionMode::ALL, "Set default Union Mode in SelectWithUnion query. Possible values: empty string, 'ALL', 'DISTINCT'. If empty, query without Union Mode will throw exception.", 0)
// End of COMMON_SETTINGS
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS below.

View File

@ -12,6 +12,7 @@ namespace ErrorCodes
extern const int UNKNOWN_JOIN;
extern const int BAD_ARGUMENTS;
extern const int UNKNOWN_MYSQL_DATATYPES_SUPPORT_LEVEL;
extern const int UNKNOWN_UNION;
}
@ -96,4 +97,9 @@ IMPLEMENT_SETTING_MULTI_ENUM(MySQLDataTypesSupport, ErrorCodes::UNKNOWN_MYSQL_DA
{{"decimal", MySQLDataTypesSupport::DECIMAL},
{"datetime64", MySQLDataTypesSupport::DATETIME64}})
IMPLEMENT_SETTING_ENUM(UnionMode, ErrorCodes::UNKNOWN_UNION,
{{"", UnionMode::Unspecified},
{"ALL", UnionMode::ALL},
{"DISTINCT", UnionMode::DISTINCT}})
}

View File

@ -119,7 +119,6 @@ enum class DefaultDatabaseEngine
DECLARE_SETTING_ENUM(DefaultDatabaseEngine)
enum class MySQLDataTypesSupport
{
DECIMAL, // convert MySQL's decimal and number to ClickHouse Decimal when applicable
@ -129,4 +128,13 @@ enum class MySQLDataTypesSupport
DECLARE_SETTING_MULTI_ENUM(MySQLDataTypesSupport)
enum class UnionMode
{
Unspecified = 0, // Query UNION without UnionMode will throw exception
ALL, // Query UNION without UnionMode -> SELECT ... UNION ALL SELECT ...
DISTINCT // Query UNION without UnionMode -> SELECT ... UNION DISTINCT SELECT ...
};
DECLARE_SETTING_ENUM(UnionMode)
}

View File

@ -9,6 +9,7 @@
#include <Processors/QueryPlan/IQueryPlanStep.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/QueryPlan/UnionStep.h>
#include <Processors/QueryPlan/DistinctStep.h>
namespace DB
@ -18,6 +19,7 @@ namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int UNION_ALL_RESULT_STRUCTURES_MISMATCH;
extern const int EXPECTED_ALL_OR_DISTINCT;
}
@ -31,13 +33,35 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
context(std::make_shared<Context>(context_)),
max_streams(context->getSettingsRef().max_threads)
{
const auto & ast = query_ptr->as<ASTSelectWithUnionQuery &>();
auto & ast = query_ptr->as<ASTSelectWithUnionQuery &>();
size_t num_selects = ast.list_of_selects->children.size();
if (!num_selects)
throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR);
/// For SELECT ... UNION/UNION ALL/UNION DISTINCT SELECT ... query,
/// rewrite ast with settings.union_default_mode
if (num_selects > 1)
{
if (ast.mode == ASTSelectWithUnionQuery::Mode::Unspecified)
{
const Settings & settings = context->getSettingsRef();
if (settings.union_default_mode == UnionMode::ALL)
ast.mode = ASTSelectWithUnionQuery::Mode::ALL;
else if (settings.union_default_mode == UnionMode::DISTINCT)
{
ast.mode = ASTSelectWithUnionQuery::Mode::DISTINCT;
}
else
throw Exception(
"Expected ALL or DISTINCT in SelectWithUnion query, because setting (union_default_mode) is empty",
DB::ErrorCodes::EXPECTED_ALL_OR_DISTINCT);
}
if (ast.mode == ASTSelectWithUnionQuery::Mode::DISTINCT)
distinct_union = true;
}
/// Initialize interpreters for each SELECT query.
/// Note that we pass 'required_result_column_names' to first SELECT.
/// And for the rest, we pass names at the corresponding positions of 'required_result_column_names' in the result of first SELECT,
@ -197,6 +221,17 @@ void InterpreterSelectWithUnionQuery::buildQueryPlan(QueryPlan & query_plan)
auto union_step = std::make_unique<UnionStep>(std::move(data_streams), result_header, max_threads);
query_plan.unitePlans(std::move(union_step), std::move(plans));
/// Add distinct transform for UNION DISTINCT query
if (distinct_union)
{
const Settings & settings = context->getSettingsRef();
SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode);
auto distinct_step = std::make_unique<DistinctStep>(query_plan.getCurrentDataStream(), limits, 0, result_header.getNames(), false);
query_plan.addStep(std::move(distinct_step));
}
}
BlockIO InterpreterSelectWithUnionQuery::execute()

View File

@ -12,7 +12,7 @@ class Context;
class InterpreterSelectQuery;
class QueryPlan;
/** Interprets one or multiple SELECT queries inside UNION ALL chain.
/** Interprets one or multiple SELECT queries inside UNION/UNION ALL/UNION DISTINCT chain.
*/
class InterpreterSelectWithUnionQuery : public IInterpreter
{
@ -54,6 +54,8 @@ private:
size_t max_streams = 1;
bool distinct_union = false;
static Block getCommonHeaderForUnion(const Blocks & headers);
};

View File

@ -23,13 +23,25 @@ void ASTSelectWithUnionQuery::formatQueryImpl(const FormatSettings & settings, F
{
std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' ');
std::string mode_str;
switch (mode)
{
case Mode::Unspecified:
mode_str = "";
break;
case Mode::ALL:
mode_str = "ALL";
break;
case Mode::DISTINCT:
mode_str = "DISTINCT";
break;
}
for (ASTs::const_iterator it = list_of_selects->children.begin(); it != list_of_selects->children.end(); ++it)
{
if (it != list_of_selects->children.begin())
settings.ostr
<< settings.nl_or_ws << indent_str << (settings.hilite ? hilite_keyword : "")
<< "UNION ALL" << (settings.hilite ? hilite_none : "")
<< settings.nl_or_ws;
settings.ostr << settings.nl_or_ws << indent_str << (settings.hilite ? hilite_keyword : "") << "UNION " << mode_str
<< (settings.hilite ? hilite_none : "") << settings.nl_or_ws;
(*it)->formatImpl(settings, state, frame);
}

View File

@ -17,6 +17,15 @@ public:
ASTPtr clone() const override;
void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
enum class Mode
{
Unspecified,
ALL,
DISTINCT
};
Mode mode = Mode::Unspecified;
ASTPtr list_of_selects;
};

View File

@ -27,15 +27,51 @@ bool ParserSelectWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected &
{
ASTPtr list_node;
ParserList parser(std::make_unique<ParserUnionQueryElement>(), std::make_unique<ParserKeyword>("UNION ALL"), false);
if (!parser.parse(pos, list_node, expected))
return false;
ParserList parser_union(std::make_unique<ParserUnionQueryElement>(), std::make_unique<ParserKeyword>("UNION"), false);
ParserList parser_union_all(std::make_unique<ParserUnionQueryElement>(), std::make_unique<ParserKeyword>("UNION ALL"), false);
ParserList parser_union_distinct(std::make_unique<ParserUnionQueryElement>(), std::make_unique<ParserKeyword>("UNION DISTINCT"), false);
auto begin = pos;
auto current_expected = expected;
ASTSelectWithUnionQuery::Mode union_mode = ASTSelectWithUnionQuery::Mode::ALL;
/// Parser SELECT lists and UNION type, must have UNION
auto union_parser = [&](auto & parser, auto mode) {
if (!parser.parse(pos, list_node, expected))
{
pos = begin;
expected = current_expected;
return false;
}
/// number of SELECT lists should not less than 2
if (list_node->children.size() < 2)
{
pos = begin;
expected = current_expected;
return false;
}
union_mode = mode;
return true;
};
/// We first parse: SELECT ... UNION SELECT ...
/// SELECT ... UNION ALL SELECT ...
/// SELECT ... UNION DISTINCT SELECT ...
if (!union_parser(parser_union, ASTSelectWithUnionQuery::Mode::Unspecified)
&& !union_parser(parser_union_all, ASTSelectWithUnionQuery::Mode::ALL)
&& !union_parser(parser_union_distinct, ASTSelectWithUnionQuery::Mode::DISTINCT))
{
/// If above parse failed, we back to parse SELECT without UNION
if (!parser_union.parse(pos, list_node, expected))
return false;
}
auto select_with_union_query = std::make_shared<ASTSelectWithUnionQuery>();
node = select_with_union_query;
select_with_union_query->list_of_selects = std::make_shared<ASTExpressionList>();
select_with_union_query->children.push_back(select_with_union_query->list_of_selects);
select_with_union_query->mode = union_mode;
// flatten inner union query
for (auto & child : list_node->children)