mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 00:52:02 +00:00
Added syntactic sugar for count(DISTINCT ...) and 'count_distinct_implementation' setting [#METR-22035].
This commit is contained in:
parent
7f7838c989
commit
a01534e6ab
@ -214,6 +214,9 @@ struct Settings
|
|||||||
\
|
\
|
||||||
/** Таймаут в секундах */ \
|
/** Таймаут в секундах */ \
|
||||||
M(SettingUInt64, resharding_barrier_timeout, 300) \
|
M(SettingUInt64, resharding_barrier_timeout, 300) \
|
||||||
|
\
|
||||||
|
/** What aggregate function to use for implementation of count(DISTINCT ...) */ \
|
||||||
|
M(SettingString, count_distinct_implementation, "uniq") \
|
||||||
|
|
||||||
/// Всевозможные ограничения на выполнение запроса.
|
/// Всевозможные ограничения на выполнение запроса.
|
||||||
Limits limits;
|
Limits limits;
|
||||||
|
@ -747,4 +747,44 @@ struct SettingGlobalSubqueriesMethod
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct SettingString
|
||||||
|
{
|
||||||
|
String value;
|
||||||
|
bool changed = false;
|
||||||
|
|
||||||
|
SettingString(const String & x = String{}) : value(x) {}
|
||||||
|
|
||||||
|
operator String() const { return value; }
|
||||||
|
SettingString & operator= (const String & x) { set(x); return *this; }
|
||||||
|
|
||||||
|
String toString() const
|
||||||
|
{
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
void set(const String & x)
|
||||||
|
{
|
||||||
|
value = x;
|
||||||
|
changed = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void set(const Field & x)
|
||||||
|
{
|
||||||
|
set(safeGet<const String &>(x));
|
||||||
|
}
|
||||||
|
|
||||||
|
void set(ReadBuffer & buf)
|
||||||
|
{
|
||||||
|
String x;
|
||||||
|
readBinary(x, buf);
|
||||||
|
set(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
void write(WriteBuffer & buf) const
|
||||||
|
{
|
||||||
|
writeBinary(value, buf);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -61,6 +61,7 @@ protected:
|
|||||||
* Или агрегатная функция: sum(x + f(y)), corr(x, y). По синтаксису - такая же, как обычная функция.
|
* Или агрегатная функция: sum(x + f(y)), corr(x, y). По синтаксису - такая же, как обычная функция.
|
||||||
* Или параметрическая агрегатная функция: quantile(0.9)(x + y).
|
* Или параметрическая агрегатная функция: quantile(0.9)(x + y).
|
||||||
* Синтаксис - две пары круглых скобок вместо одной. Первая - для параметров, вторая - для аргументов.
|
* Синтаксис - две пары круглых скобок вместо одной. Первая - для параметров, вторая - для аргументов.
|
||||||
|
* Для функций может быть указан модификатор DISTINCT, например count(DISTINCT x, y).
|
||||||
*/
|
*/
|
||||||
class ParserFunction : public IParserBase
|
class ParserFunction : public IParserBase
|
||||||
{
|
{
|
||||||
|
@ -577,6 +577,13 @@ void ExpressionAnalyzer::normalizeTreeImpl(
|
|||||||
replaced = true;
|
replaced = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Select implementation of countDistinct based on settings.
|
||||||
|
/// Important that it is done as query rewrite. It means rewritten query
|
||||||
|
/// will be sent to remote servers during distributed query execution,
|
||||||
|
/// and on all remote servers, function implementation will be same.
|
||||||
|
if (func_node->name == "countDistinct")
|
||||||
|
func_node->name = settings.count_distinct_implementation;
|
||||||
|
|
||||||
/// Может быть указано IN t, где t - таблица, что равносильно IN (SELECT * FROM t).
|
/// Может быть указано IN t, где t - таблица, что равносильно IN (SELECT * FROM t).
|
||||||
if (functionIsInOrGlobalInOperator(func_node->name))
|
if (functionIsInOrGlobalInOperator(func_node->name))
|
||||||
if (ASTIdentifier * right = typeid_cast<ASTIdentifier *>(func_node->arguments->children.at(1).get()))
|
if (ASTIdentifier * right = typeid_cast<ASTIdentifier *>(func_node->arguments->children.at(1).get()))
|
||||||
|
@ -203,9 +203,12 @@ bool ParserFunction::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_pars
|
|||||||
|
|
||||||
ParserIdentifier id_parser;
|
ParserIdentifier id_parser;
|
||||||
ParserString open("("), close(")");
|
ParserString open("("), close(")");
|
||||||
|
ParserString distinct("DISTINCT", true, true);
|
||||||
ParserExpressionList contents(false);
|
ParserExpressionList contents(false);
|
||||||
ParserWhiteSpaceOrComments ws;
|
ParserWhiteSpaceOrComments ws;
|
||||||
|
|
||||||
|
bool has_distinct_modifier = false;
|
||||||
|
|
||||||
ASTPtr identifier;
|
ASTPtr identifier;
|
||||||
ASTPtr expr_list_args;
|
ASTPtr expr_list_args;
|
||||||
ASTPtr expr_list_params;
|
ASTPtr expr_list_params;
|
||||||
@ -219,6 +222,13 @@ bool ParserFunction::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_pars
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
ws.ignore(pos, end);
|
ws.ignore(pos, end);
|
||||||
|
|
||||||
|
if (distinct.ignore(pos, end, max_parsed_pos, expected))
|
||||||
|
{
|
||||||
|
has_distinct_modifier = true;
|
||||||
|
ws.ignore(pos, end);
|
||||||
|
}
|
||||||
|
|
||||||
Pos contents_begin = pos;
|
Pos contents_begin = pos;
|
||||||
if (!contents.parse(pos, end, expr_list_args, max_parsed_pos, expected))
|
if (!contents.parse(pos, end, expr_list_args, max_parsed_pos, expected))
|
||||||
return false;
|
return false;
|
||||||
@ -254,10 +264,21 @@ bool ParserFunction::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_pars
|
|||||||
/// У параметрической агрегатной функции - два списка (параметры и аргументы) в круглых скобках. Пример: quantile(0.9)(x).
|
/// У параметрической агрегатной функции - два списка (параметры и аргументы) в круглых скобках. Пример: quantile(0.9)(x).
|
||||||
if (open.ignore(pos, end, max_parsed_pos, expected))
|
if (open.ignore(pos, end, max_parsed_pos, expected))
|
||||||
{
|
{
|
||||||
|
/// Parametric aggregate functions cannot have DISTINCT in parameters list.
|
||||||
|
if (has_distinct_modifier)
|
||||||
|
return false;
|
||||||
|
|
||||||
expr_list_params = expr_list_args;
|
expr_list_params = expr_list_args;
|
||||||
expr_list_args = nullptr;
|
expr_list_args = nullptr;
|
||||||
|
|
||||||
ws.ignore(pos, end);
|
ws.ignore(pos, end);
|
||||||
|
|
||||||
|
if (distinct.ignore(pos, end, max_parsed_pos, expected))
|
||||||
|
{
|
||||||
|
has_distinct_modifier = true;
|
||||||
|
ws.ignore(pos, end);
|
||||||
|
}
|
||||||
|
|
||||||
if (!contents.parse(pos, end, expr_list_args, max_parsed_pos, expected))
|
if (!contents.parse(pos, end, expr_list_args, max_parsed_pos, expected))
|
||||||
return false;
|
return false;
|
||||||
ws.ignore(pos, end);
|
ws.ignore(pos, end);
|
||||||
@ -267,9 +288,12 @@ bool ParserFunction::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_pars
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto function_node = std::make_shared<ASTFunction>(StringRange(begin, pos));
|
auto function_node = std::make_shared<ASTFunction>(StringRange(begin, pos));
|
||||||
ASTPtr node_holder{function_node};
|
|
||||||
function_node->name = typeid_cast<ASTIdentifier &>(*identifier).name;
|
function_node->name = typeid_cast<ASTIdentifier &>(*identifier).name;
|
||||||
|
|
||||||
|
/// func(DISTINCT ...) is equivalent to funcDistinct(...)
|
||||||
|
if (has_distinct_modifier)
|
||||||
|
function_node->name += "Distinct";
|
||||||
|
|
||||||
function_node->arguments = expr_list_args;
|
function_node->arguments = expr_list_args;
|
||||||
function_node->children.push_back(function_node->arguments);
|
function_node->children.push_back(function_node->arguments);
|
||||||
|
|
||||||
@ -279,7 +303,7 @@ bool ParserFunction::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_pars
|
|||||||
function_node->children.push_back(function_node->parameters);
|
function_node->children.push_back(function_node->parameters);
|
||||||
}
|
}
|
||||||
|
|
||||||
node = node_holder;
|
node = function_node;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -0,0 +1,6 @@
|
|||||||
|
123
|
||||||
|
143
|
||||||
|
123
|
||||||
|
143
|
||||||
|
123
|
||||||
|
143
|
9
dbms/tests/queries/0_stateless/00350_count_distinct.sql
Normal file
9
dbms/tests/queries/0_stateless/00350_count_distinct.sql
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
SET count_distinct_implementation = 'uniq';
|
||||||
|
SELECT count(DISTINCT x) FROM (SELECT number % 123 AS x FROM system.numbers LIMIT 1000);
|
||||||
|
SELECT count(DISTINCT x, y) FROM (SELECT number % 11 AS x, number % 13 AS y FROM system.numbers LIMIT 1000);
|
||||||
|
SET count_distinct_implementation = 'uniqCombined';
|
||||||
|
SELECT count(DISTINCT x) FROM (SELECT number % 123 AS x FROM system.numbers LIMIT 1000);
|
||||||
|
SELECT count(DISTINCT x, y) FROM (SELECT number % 11 AS x, number % 13 AS y FROM system.numbers LIMIT 1000);
|
||||||
|
SET count_distinct_implementation = 'uniqExact';
|
||||||
|
SELECT count(DISTINCT x) FROM (SELECT number % 123 AS x FROM system.numbers LIMIT 1000);
|
||||||
|
SELECT count(DISTINCT x, y) FROM (SELECT number % 11 AS x, number % 13 AS y FROM system.numbers LIMIT 1000);
|
Loading…
Reference in New Issue
Block a user