Added syntactic sugar for count(DISTINCT ...) and 'count_distinct_implementation' setting [#METR-22035].

This commit is contained in:
Alexey Milovidov 2016-07-10 18:52:35 +03:00
parent 7f7838c989
commit a01534e6ab
7 changed files with 92 additions and 2 deletions

View File

@ -214,6 +214,9 @@ struct Settings
\
/** Таймаут в секундах */ \
M(SettingUInt64, resharding_barrier_timeout, 300) \
\
/** What aggregate function to use for implementation of count(DISTINCT ...) */ \
M(SettingString, count_distinct_implementation, "uniq") \
/// Всевозможные ограничения на выполнение запроса.
Limits limits;

View File

@ -747,4 +747,44 @@ struct SettingGlobalSubqueriesMethod
}
};
struct SettingString
{
String value;
bool changed = false;
SettingString(const String & x = String{}) : value(x) {}
operator String() const { return value; }
SettingString & operator= (const String & x) { set(x); return *this; }
String toString() const
{
return value;
}
void set(const String & x)
{
value = x;
changed = true;
}
void set(const Field & x)
{
set(safeGet<const String &>(x));
}
void set(ReadBuffer & buf)
{
String x;
readBinary(x, buf);
set(x);
}
void write(WriteBuffer & buf) const
{
writeBinary(value, buf);
}
};
}

View File

@ -61,6 +61,7 @@ protected:
* Или агрегатная функция: sum(x + f(y)), corr(x, y). По синтаксису - такая же, как обычная функция.
* Или параметрическая агрегатная функция: quantile(0.9)(x + y).
* Синтаксис - две пары круглых скобок вместо одной. Первая - для параметров, вторая - для аргументов.
* Для функций может быть указан модификатор DISTINCT, например count(DISTINCT x, y).
*/
class ParserFunction : public IParserBase
{

View File

@ -577,6 +577,13 @@ void ExpressionAnalyzer::normalizeTreeImpl(
replaced = true;
}
/// Select implementation of countDistinct based on settings.
/// Important that it is done as query rewrite. It means rewritten query
/// will be sent to remote servers during distributed query execution,
/// and on all remote servers, function implementation will be same.
if (func_node->name == "countDistinct")
func_node->name = settings.count_distinct_implementation;
/// Может быть указано IN t, где t - таблица, что равносильно IN (SELECT * FROM t).
if (functionIsInOrGlobalInOperator(func_node->name))
if (ASTIdentifier * right = typeid_cast<ASTIdentifier *>(func_node->arguments->children.at(1).get()))

View File

@ -203,9 +203,12 @@ bool ParserFunction::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_pars
ParserIdentifier id_parser;
ParserString open("("), close(")");
ParserString distinct("DISTINCT", true, true);
ParserExpressionList contents(false);
ParserWhiteSpaceOrComments ws;
bool has_distinct_modifier = false;
ASTPtr identifier;
ASTPtr expr_list_args;
ASTPtr expr_list_params;
@ -219,6 +222,13 @@ bool ParserFunction::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_pars
return false;
ws.ignore(pos, end);
if (distinct.ignore(pos, end, max_parsed_pos, expected))
{
has_distinct_modifier = true;
ws.ignore(pos, end);
}
Pos contents_begin = pos;
if (!contents.parse(pos, end, expr_list_args, max_parsed_pos, expected))
return false;
@ -254,10 +264,21 @@ bool ParserFunction::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_pars
/// У параметрической агрегатной функции - два списка (параметры и аргументы) в круглых скобках. Пример: quantile(0.9)(x).
if (open.ignore(pos, end, max_parsed_pos, expected))
{
/// Parametric aggregate functions cannot have DISTINCT in parameters list.
if (has_distinct_modifier)
return false;
expr_list_params = expr_list_args;
expr_list_args = nullptr;
ws.ignore(pos, end);
if (distinct.ignore(pos, end, max_parsed_pos, expected))
{
has_distinct_modifier = true;
ws.ignore(pos, end);
}
if (!contents.parse(pos, end, expr_list_args, max_parsed_pos, expected))
return false;
ws.ignore(pos, end);
@ -267,9 +288,12 @@ bool ParserFunction::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_pars
}
auto function_node = std::make_shared<ASTFunction>(StringRange(begin, pos));
ASTPtr node_holder{function_node};
function_node->name = typeid_cast<ASTIdentifier &>(*identifier).name;
/// func(DISTINCT ...) is equivalent to funcDistinct(...)
if (has_distinct_modifier)
function_node->name += "Distinct";
function_node->arguments = expr_list_args;
function_node->children.push_back(function_node->arguments);
@ -279,7 +303,7 @@ bool ParserFunction::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_pars
function_node->children.push_back(function_node->parameters);
}
node = node_holder;
node = function_node;
return true;
}

View File

@ -0,0 +1,6 @@
123
143
123
143
123
143

View File

@ -0,0 +1,9 @@
SET count_distinct_implementation = 'uniq';
SELECT count(DISTINCT x) FROM (SELECT number % 123 AS x FROM system.numbers LIMIT 1000);
SELECT count(DISTINCT x, y) FROM (SELECT number % 11 AS x, number % 13 AS y FROM system.numbers LIMIT 1000);
SET count_distinct_implementation = 'uniqCombined';
SELECT count(DISTINCT x) FROM (SELECT number % 123 AS x FROM system.numbers LIMIT 1000);
SELECT count(DISTINCT x, y) FROM (SELECT number % 11 AS x, number % 13 AS y FROM system.numbers LIMIT 1000);
SET count_distinct_implementation = 'uniqExact';
SELECT count(DISTINCT x) FROM (SELECT number % 123 AS x FROM system.numbers LIMIT 1000);
SELECT count(DISTINCT x, y) FROM (SELECT number % 11 AS x, number % 13 AS y FROM system.numbers LIMIT 1000);