mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
Added syntactic sugar for count(DISTINCT ...) and 'count_distinct_implementation' setting [#METR-22035].
This commit is contained in:
parent
7f7838c989
commit
a01534e6ab
@ -214,6 +214,9 @@ struct Settings
|
||||
\
|
||||
/** Таймаут в секундах */ \
|
||||
M(SettingUInt64, resharding_barrier_timeout, 300) \
|
||||
\
|
||||
/** What aggregate function to use for implementation of count(DISTINCT ...) */ \
|
||||
M(SettingString, count_distinct_implementation, "uniq") \
|
||||
|
||||
/// Всевозможные ограничения на выполнение запроса.
|
||||
Limits limits;
|
||||
|
@ -747,4 +747,44 @@ struct SettingGlobalSubqueriesMethod
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct SettingString
|
||||
{
|
||||
String value;
|
||||
bool changed = false;
|
||||
|
||||
SettingString(const String & x = String{}) : value(x) {}
|
||||
|
||||
operator String() const { return value; }
|
||||
SettingString & operator= (const String & x) { set(x); return *this; }
|
||||
|
||||
String toString() const
|
||||
{
|
||||
return value;
|
||||
}
|
||||
|
||||
void set(const String & x)
|
||||
{
|
||||
value = x;
|
||||
changed = true;
|
||||
}
|
||||
|
||||
void set(const Field & x)
|
||||
{
|
||||
set(safeGet<const String &>(x));
|
||||
}
|
||||
|
||||
void set(ReadBuffer & buf)
|
||||
{
|
||||
String x;
|
||||
readBinary(x, buf);
|
||||
set(x);
|
||||
}
|
||||
|
||||
void write(WriteBuffer & buf) const
|
||||
{
|
||||
writeBinary(value, buf);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -61,6 +61,7 @@ protected:
|
||||
* Или агрегатная функция: sum(x + f(y)), corr(x, y). По синтаксису - такая же, как обычная функция.
|
||||
* Или параметрическая агрегатная функция: quantile(0.9)(x + y).
|
||||
* Синтаксис - две пары круглых скобок вместо одной. Первая - для параметров, вторая - для аргументов.
|
||||
* Для функций может быть указан модификатор DISTINCT, например count(DISTINCT x, y).
|
||||
*/
|
||||
class ParserFunction : public IParserBase
|
||||
{
|
||||
|
@ -577,6 +577,13 @@ void ExpressionAnalyzer::normalizeTreeImpl(
|
||||
replaced = true;
|
||||
}
|
||||
|
||||
/// Select implementation of countDistinct based on settings.
|
||||
/// Important that it is done as query rewrite. It means rewritten query
|
||||
/// will be sent to remote servers during distributed query execution,
|
||||
/// and on all remote servers, function implementation will be same.
|
||||
if (func_node->name == "countDistinct")
|
||||
func_node->name = settings.count_distinct_implementation;
|
||||
|
||||
/// Может быть указано IN t, где t - таблица, что равносильно IN (SELECT * FROM t).
|
||||
if (functionIsInOrGlobalInOperator(func_node->name))
|
||||
if (ASTIdentifier * right = typeid_cast<ASTIdentifier *>(func_node->arguments->children.at(1).get()))
|
||||
|
@ -203,9 +203,12 @@ bool ParserFunction::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_pars
|
||||
|
||||
ParserIdentifier id_parser;
|
||||
ParserString open("("), close(")");
|
||||
ParserString distinct("DISTINCT", true, true);
|
||||
ParserExpressionList contents(false);
|
||||
ParserWhiteSpaceOrComments ws;
|
||||
|
||||
bool has_distinct_modifier = false;
|
||||
|
||||
ASTPtr identifier;
|
||||
ASTPtr expr_list_args;
|
||||
ASTPtr expr_list_params;
|
||||
@ -219,6 +222,13 @@ bool ParserFunction::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_pars
|
||||
return false;
|
||||
|
||||
ws.ignore(pos, end);
|
||||
|
||||
if (distinct.ignore(pos, end, max_parsed_pos, expected))
|
||||
{
|
||||
has_distinct_modifier = true;
|
||||
ws.ignore(pos, end);
|
||||
}
|
||||
|
||||
Pos contents_begin = pos;
|
||||
if (!contents.parse(pos, end, expr_list_args, max_parsed_pos, expected))
|
||||
return false;
|
||||
@ -254,10 +264,21 @@ bool ParserFunction::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_pars
|
||||
/// У параметрической агрегатной функции - два списка (параметры и аргументы) в круглых скобках. Пример: quantile(0.9)(x).
|
||||
if (open.ignore(pos, end, max_parsed_pos, expected))
|
||||
{
|
||||
/// Parametric aggregate functions cannot have DISTINCT in parameters list.
|
||||
if (has_distinct_modifier)
|
||||
return false;
|
||||
|
||||
expr_list_params = expr_list_args;
|
||||
expr_list_args = nullptr;
|
||||
|
||||
ws.ignore(pos, end);
|
||||
|
||||
if (distinct.ignore(pos, end, max_parsed_pos, expected))
|
||||
{
|
||||
has_distinct_modifier = true;
|
||||
ws.ignore(pos, end);
|
||||
}
|
||||
|
||||
if (!contents.parse(pos, end, expr_list_args, max_parsed_pos, expected))
|
||||
return false;
|
||||
ws.ignore(pos, end);
|
||||
@ -267,9 +288,12 @@ bool ParserFunction::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_pars
|
||||
}
|
||||
|
||||
auto function_node = std::make_shared<ASTFunction>(StringRange(begin, pos));
|
||||
ASTPtr node_holder{function_node};
|
||||
function_node->name = typeid_cast<ASTIdentifier &>(*identifier).name;
|
||||
|
||||
/// func(DISTINCT ...) is equivalent to funcDistinct(...)
|
||||
if (has_distinct_modifier)
|
||||
function_node->name += "Distinct";
|
||||
|
||||
function_node->arguments = expr_list_args;
|
||||
function_node->children.push_back(function_node->arguments);
|
||||
|
||||
@ -279,7 +303,7 @@ bool ParserFunction::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_pars
|
||||
function_node->children.push_back(function_node->parameters);
|
||||
}
|
||||
|
||||
node = node_holder;
|
||||
node = function_node;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,6 @@
|
||||
123
|
||||
143
|
||||
123
|
||||
143
|
||||
123
|
||||
143
|
9
dbms/tests/queries/0_stateless/00350_count_distinct.sql
Normal file
9
dbms/tests/queries/0_stateless/00350_count_distinct.sql
Normal file
@ -0,0 +1,9 @@
|
||||
SET count_distinct_implementation = 'uniq';
|
||||
SELECT count(DISTINCT x) FROM (SELECT number % 123 AS x FROM system.numbers LIMIT 1000);
|
||||
SELECT count(DISTINCT x, y) FROM (SELECT number % 11 AS x, number % 13 AS y FROM system.numbers LIMIT 1000);
|
||||
SET count_distinct_implementation = 'uniqCombined';
|
||||
SELECT count(DISTINCT x) FROM (SELECT number % 123 AS x FROM system.numbers LIMIT 1000);
|
||||
SELECT count(DISTINCT x, y) FROM (SELECT number % 11 AS x, number % 13 AS y FROM system.numbers LIMIT 1000);
|
||||
SET count_distinct_implementation = 'uniqExact';
|
||||
SELECT count(DISTINCT x) FROM (SELECT number % 123 AS x FROM system.numbers LIMIT 1000);
|
||||
SELECT count(DISTINCT x, y) FROM (SELECT number % 11 AS x, number % 13 AS y FROM system.numbers LIMIT 1000);
|
Loading…
Reference in New Issue
Block a user