mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
add union distinct and setting union_default_mode
This commit is contained in:
parent
b4f0e08369
commit
15e4e03469
@ -519,6 +519,8 @@ namespace ErrorCodes
|
||||
extern const int CONDITIONAL_TREE_PARENT_NOT_FOUND = 2001;
|
||||
extern const int ILLEGAL_PROJECTION_MANIPULATOR = 2002;
|
||||
extern const int UNRECOGNIZED_ARGUMENTS = 2003;
|
||||
extern const int UNKNOWN_UNION = 2004;
|
||||
extern const int EXPECTED_ALL_OR_DISTINCT = 2005;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -400,7 +400,8 @@ class IColumn;
|
||||
M(Bool, optimize_trivial_insert_select, true, "Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query", 0) \
|
||||
M(Bool, allow_experimental_database_atomic, true, "Obsolete setting, does nothing. Will be removed after 2021-02-12", 0) \
|
||||
M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \
|
||||
M(Bool, enable_global_with_statement, false, "Propagate WITH statements to UNION queries and all subqueries", 0)
|
||||
M(Bool, enable_global_with_statement, false, "Propagate WITH statements to UNION queries and all subqueries", 0) \
|
||||
M(UnionMode, union_default_mode, UnionMode::ALL, "Set default Union Mode in SelectWithUnion query. Possible values: empty string, 'ALL', 'DISTINCT'. If empty, query without Union Mode will throw exception.", 0)
|
||||
|
||||
// End of COMMON_SETTINGS
|
||||
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS below.
|
||||
|
@ -12,6 +12,7 @@ namespace ErrorCodes
|
||||
extern const int UNKNOWN_JOIN;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int UNKNOWN_MYSQL_DATATYPES_SUPPORT_LEVEL;
|
||||
extern const int UNKNOWN_UNION;
|
||||
}
|
||||
|
||||
|
||||
@ -96,4 +97,9 @@ IMPLEMENT_SETTING_MULTI_ENUM(MySQLDataTypesSupport, ErrorCodes::UNKNOWN_MYSQL_DA
|
||||
{{"decimal", MySQLDataTypesSupport::DECIMAL},
|
||||
{"datetime64", MySQLDataTypesSupport::DATETIME64}})
|
||||
|
||||
IMPLEMENT_SETTING_ENUM(UnionMode, ErrorCodes::UNKNOWN_UNION,
|
||||
{{"", UnionMode::Unspecified},
|
||||
{"ALL", UnionMode::ALL},
|
||||
{"DISTINCT", UnionMode::DISTINCT}})
|
||||
|
||||
}
|
||||
|
@ -119,7 +119,6 @@ enum class DefaultDatabaseEngine
|
||||
|
||||
DECLARE_SETTING_ENUM(DefaultDatabaseEngine)
|
||||
|
||||
|
||||
enum class MySQLDataTypesSupport
|
||||
{
|
||||
DECIMAL, // convert MySQL's decimal and number to ClickHouse Decimal when applicable
|
||||
@ -129,4 +128,13 @@ enum class MySQLDataTypesSupport
|
||||
|
||||
DECLARE_SETTING_MULTI_ENUM(MySQLDataTypesSupport)
|
||||
|
||||
enum class UnionMode
|
||||
{
|
||||
Unspecified = 0, // Query UNION without UnionMode will throw exception
|
||||
ALL, // Query UNION without UnionMode -> SELECT ... UNION ALL SELECT ...
|
||||
DISTINCT // Query UNION without UnionMode -> SELECT ... UNION DISTINCT SELECT ...
|
||||
};
|
||||
|
||||
DECLARE_SETTING_ENUM(UnionMode)
|
||||
|
||||
}
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <Processors/QueryPlan/IQueryPlanStep.h>
|
||||
#include <Processors/QueryPlan/QueryPlan.h>
|
||||
#include <Processors/QueryPlan/UnionStep.h>
|
||||
#include <Processors/QueryPlan/DistinctStep.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -18,6 +19,7 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int UNION_ALL_RESULT_STRUCTURES_MISMATCH;
|
||||
extern const int EXPECTED_ALL_OR_DISTINCT;
|
||||
}
|
||||
|
||||
|
||||
@ -31,13 +33,35 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
|
||||
context(std::make_shared<Context>(context_)),
|
||||
max_streams(context->getSettingsRef().max_threads)
|
||||
{
|
||||
const auto & ast = query_ptr->as<ASTSelectWithUnionQuery &>();
|
||||
auto & ast = query_ptr->as<ASTSelectWithUnionQuery &>();
|
||||
|
||||
size_t num_selects = ast.list_of_selects->children.size();
|
||||
|
||||
if (!num_selects)
|
||||
throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
/// For SELECT ... UNION/UNION ALL/UNION DISTINCT SELECT ... query,
|
||||
/// rewrite ast with settings.union_default_mode
|
||||
if (num_selects > 1)
|
||||
{
|
||||
if (ast.mode == ASTSelectWithUnionQuery::Mode::Unspecified)
|
||||
{
|
||||
const Settings & settings = context->getSettingsRef();
|
||||
if (settings.union_default_mode == UnionMode::ALL)
|
||||
ast.mode = ASTSelectWithUnionQuery::Mode::ALL;
|
||||
else if (settings.union_default_mode == UnionMode::DISTINCT)
|
||||
{
|
||||
ast.mode = ASTSelectWithUnionQuery::Mode::DISTINCT;
|
||||
}
|
||||
else
|
||||
throw Exception(
|
||||
"Expected ALL or DISTINCT in SelectWithUnion query, because setting (union_default_mode) is empty",
|
||||
DB::ErrorCodes::EXPECTED_ALL_OR_DISTINCT);
|
||||
}
|
||||
if (ast.mode == ASTSelectWithUnionQuery::Mode::DISTINCT)
|
||||
distinct_union = true;
|
||||
}
|
||||
|
||||
/// Initialize interpreters for each SELECT query.
|
||||
/// Note that we pass 'required_result_column_names' to first SELECT.
|
||||
/// And for the rest, we pass names at the corresponding positions of 'required_result_column_names' in the result of first SELECT,
|
||||
@ -197,6 +221,17 @@ void InterpreterSelectWithUnionQuery::buildQueryPlan(QueryPlan & query_plan)
|
||||
auto union_step = std::make_unique<UnionStep>(std::move(data_streams), result_header, max_threads);
|
||||
|
||||
query_plan.unitePlans(std::move(union_step), std::move(plans));
|
||||
|
||||
/// Add distinct transform for UNION DISTINCT query
|
||||
if (distinct_union)
|
||||
{
|
||||
const Settings & settings = context->getSettingsRef();
|
||||
SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode);
|
||||
|
||||
auto distinct_step = std::make_unique<DistinctStep>(query_plan.getCurrentDataStream(), limits, 0, result_header.getNames(), false);
|
||||
|
||||
query_plan.addStep(std::move(distinct_step));
|
||||
}
|
||||
}
|
||||
|
||||
BlockIO InterpreterSelectWithUnionQuery::execute()
|
||||
|
@ -12,7 +12,7 @@ class Context;
|
||||
class InterpreterSelectQuery;
|
||||
class QueryPlan;
|
||||
|
||||
/** Interprets one or multiple SELECT queries inside UNION ALL chain.
|
||||
/** Interprets one or multiple SELECT queries inside UNION/UNION ALL/UNION DISTINCT chain.
|
||||
*/
|
||||
class InterpreterSelectWithUnionQuery : public IInterpreter
|
||||
{
|
||||
@ -54,6 +54,8 @@ private:
|
||||
|
||||
size_t max_streams = 1;
|
||||
|
||||
bool distinct_union = false;
|
||||
|
||||
static Block getCommonHeaderForUnion(const Blocks & headers);
|
||||
};
|
||||
|
||||
|
@ -23,13 +23,25 @@ void ASTSelectWithUnionQuery::formatQueryImpl(const FormatSettings & settings, F
|
||||
{
|
||||
std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' ');
|
||||
|
||||
std::string mode_str;
|
||||
switch (mode)
|
||||
{
|
||||
case Mode::Unspecified:
|
||||
mode_str = "";
|
||||
break;
|
||||
case Mode::ALL:
|
||||
mode_str = "ALL";
|
||||
break;
|
||||
case Mode::DISTINCT:
|
||||
mode_str = "DISTINCT";
|
||||
break;
|
||||
}
|
||||
|
||||
for (ASTs::const_iterator it = list_of_selects->children.begin(); it != list_of_selects->children.end(); ++it)
|
||||
{
|
||||
if (it != list_of_selects->children.begin())
|
||||
settings.ostr
|
||||
<< settings.nl_or_ws << indent_str << (settings.hilite ? hilite_keyword : "")
|
||||
<< "UNION ALL" << (settings.hilite ? hilite_none : "")
|
||||
<< settings.nl_or_ws;
|
||||
settings.ostr << settings.nl_or_ws << indent_str << (settings.hilite ? hilite_keyword : "") << "UNION " << mode_str
|
||||
<< (settings.hilite ? hilite_none : "") << settings.nl_or_ws;
|
||||
|
||||
(*it)->formatImpl(settings, state, frame);
|
||||
}
|
||||
|
@ -17,6 +17,15 @@ public:
|
||||
ASTPtr clone() const override;
|
||||
void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
|
||||
|
||||
enum class Mode
|
||||
{
|
||||
Unspecified,
|
||||
ALL,
|
||||
DISTINCT
|
||||
};
|
||||
|
||||
Mode mode = Mode::Unspecified;
|
||||
|
||||
ASTPtr list_of_selects;
|
||||
};
|
||||
|
||||
|
@ -27,15 +27,51 @@ bool ParserSelectWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected &
|
||||
{
|
||||
ASTPtr list_node;
|
||||
|
||||
ParserList parser(std::make_unique<ParserUnionQueryElement>(), std::make_unique<ParserKeyword>("UNION ALL"), false);
|
||||
ParserList parser_union(std::make_unique<ParserUnionQueryElement>(), std::make_unique<ParserKeyword>("UNION"), false);
|
||||
ParserList parser_union_all(std::make_unique<ParserUnionQueryElement>(), std::make_unique<ParserKeyword>("UNION ALL"), false);
|
||||
ParserList parser_union_distinct(std::make_unique<ParserUnionQueryElement>(), std::make_unique<ParserKeyword>("UNION DISTINCT"), false);
|
||||
|
||||
auto begin = pos;
|
||||
auto current_expected = expected;
|
||||
ASTSelectWithUnionQuery::Mode union_mode = ASTSelectWithUnionQuery::Mode::ALL;
|
||||
|
||||
/// Parser SELECT lists and UNION type, must have UNION
|
||||
auto union_parser = [&](auto & parser, auto mode) {
|
||||
if (!parser.parse(pos, list_node, expected))
|
||||
{
|
||||
pos = begin;
|
||||
expected = current_expected;
|
||||
return false;
|
||||
}
|
||||
/// number of SELECT lists should not less than 2
|
||||
if (list_node->children.size() < 2)
|
||||
{
|
||||
pos = begin;
|
||||
expected = current_expected;
|
||||
return false;
|
||||
}
|
||||
union_mode = mode;
|
||||
return true;
|
||||
};
|
||||
|
||||
/// We first parse: SELECT ... UNION SELECT ...
|
||||
/// SELECT ... UNION ALL SELECT ...
|
||||
/// SELECT ... UNION DISTINCT SELECT ...
|
||||
if (!union_parser(parser_union, ASTSelectWithUnionQuery::Mode::Unspecified)
|
||||
&& !union_parser(parser_union_all, ASTSelectWithUnionQuery::Mode::ALL)
|
||||
&& !union_parser(parser_union_distinct, ASTSelectWithUnionQuery::Mode::DISTINCT))
|
||||
{
|
||||
/// If above parse failed, we back to parse SELECT without UNION
|
||||
if (!parser_union.parse(pos, list_node, expected))
|
||||
return false;
|
||||
}
|
||||
|
||||
auto select_with_union_query = std::make_shared<ASTSelectWithUnionQuery>();
|
||||
|
||||
node = select_with_union_query;
|
||||
select_with_union_query->list_of_selects = std::make_shared<ASTExpressionList>();
|
||||
select_with_union_query->children.push_back(select_with_union_query->list_of_selects);
|
||||
select_with_union_query->mode = union_mode;
|
||||
|
||||
// flatten inner union query
|
||||
for (auto & child : list_node->children)
|
||||
|
Loading…
Reference in New Issue
Block a user