more optimal aggregate functions with both 'if' and 'distinct' combinators

This commit is contained in:
Anton Popov 2020-06-22 17:55:49 +03:00
parent 3f0f0ff5ad
commit 2f95daf4ed
4 changed files with 45 additions and 4 deletions

View File

@ -88,7 +88,10 @@ struct CustomizeFunctionsData
};
char countdistinct[] = "countdistinct";
using CustomizeFunctionsVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeFunctionsData<countdistinct>>, true>;
using CustomizeCountDistinctVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeFunctionsData<countdistinct>>, true>;
char countifdistinct[] = "countifdistinct";
using CustomizeCountIfDistinctVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeFunctionsData<countifdistinct>>, true>;
char in[] = "in";
using CustomizeInVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeFunctionsData<in>>, true>;
@ -102,6 +105,26 @@ using CustomizeGlobalInVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeFunc
char globalNotIn[] = "globalnotin";
using CustomizeGlobalNotInVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeFunctionsData<globalNotIn>>, true>;
template <char const * func_suffix>
struct CustomizeFunctionsSuffixData
{
using TypeToVisit = ASTFunction;
const String & customized_func_suffix;
void visit(ASTFunction & func, ASTPtr &)
{
if (endsWith(Poco::toLower(func.name), func_suffix))
{
size_t prefix_len = func.name.length() - strlen(func_suffix);
func.name = func.name.substr(0, prefix_len) + customized_func_suffix;
}
}
};
/// Swap 'if' and 'distinct' suffixes to make execution more optimal.
char ifDistinct[] = "ifdistinct";
using CustomizeIfDistinctVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeFunctionsSuffixData<ifDistinct>>, true>;
/// Translate qualified names such as db.table.column, table.column, table_alias.column to names' normal form.
/// Expand asterisks and qualified asterisks with column names.
@ -1035,8 +1058,14 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(ASTPtr & query, const NamesAndTy
void SyntaxAnalyzer::normalize(ASTPtr & query, Aliases & aliases, const Settings & settings)
{
CustomizeFunctionsVisitor::Data data{settings.count_distinct_implementation};
CustomizeFunctionsVisitor(data).visit(query);
CustomizeCountDistinctVisitor::Data data_count_distinct{settings.count_distinct_implementation};
CustomizeCountDistinctVisitor(data_count_distinct).visit(query);
CustomizeCountIfDistinctVisitor::Data data_count_if_distinct{settings.count_distinct_implementation.toString() + "If"};
CustomizeCountIfDistinctVisitor(data_count_if_distinct).visit(query);
CustomizeIfDistinctVisitor::Data data_distinct_if{"DistinctIf"};
CustomizeIfDistinctVisitor(data_distinct_if).visit(query);
if (settings.transform_null_in)
{

View File

@ -1,4 +1,6 @@
<test>
<query>SELECT x, sum(DISTINCT y) from (SELECT number % 12 AS x, number % 12321 AS y FROM numbers(10000000)) GROUP BY x</query>
<query>SELECT x, sum(y) from (SELECT DISTINCT number % 12 AS x, number % 12321 AS y FROM numbers(10000000)) GROUP BY x</query>
<query>SELECT sumIf(DISTINCT number % 10, number % 5 = 2) FROM numbers(100000000)</query>
<query>SELECT countIf(DISTINCT number % 10, number % 5 = 2) FROM numbers(100000000)</query>
</test>

View File

@ -9,3 +9,7 @@
21
24
27
2
SELECT uniqExactIf(number % 10, (number % 5) = 2)\nFROM numbers(10000)
9
SELECT sumDistinctIf(number % 10, (number % 5) = 2)\nFROM numbers(10000)

View File

@ -5,5 +5,11 @@ SELECT finalizeAggregation(countState(DISTINCT toString(number % 20))) FROM numb
SELECT round(corrStable(DISTINCT x, y), 5) FROM (SELECT number % 10 AS x, number % 5 AS y FROM numbers(1000));
SELECT round(corrStable(x, y), 5) FROM (SELECT DISTINCT number % 10 AS x, number % 5 AS y FROM numbers(1000));
SELECT sum(DISTINCT y) FROM (SELECT number % 5 AS x, number % 15 AS y FROM numbers(1000)) GROUP BY x;
SELECT sum(DISTINCT y) FROM (SELECT number % 5 AS x, number % 15 AS y FROM numbers(1000)) GROUP BY x;
SET enable_debug_queries = 1;
SELECT countIf(DISTINCT number % 10, number % 5 = 2) FROM numbers(10000);
ANALYZE SELECT countIf(DISTINCT number % 10, number % 5 = 2) FROM numbers(10000);
SELECT sumIf(DISTINCT number % 10, number % 5 = 2) FROM numbers(10000);
ANALYZE SELECT sumIf(DISTINCT number % 10, number % 5 = 2) FROM numbers(10000);