mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-17 20:02:05 +00:00
more optimal aggregate functions with both 'if' and 'distinct' combinators
This commit is contained in:
parent
3f0f0ff5ad
commit
2f95daf4ed
@ -88,7 +88,10 @@ struct CustomizeFunctionsData
|
||||
};
|
||||
|
||||
char countdistinct[] = "countdistinct";
|
||||
using CustomizeFunctionsVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeFunctionsData<countdistinct>>, true>;
|
||||
using CustomizeCountDistinctVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeFunctionsData<countdistinct>>, true>;
|
||||
|
||||
char countifdistinct[] = "countifdistinct";
|
||||
using CustomizeCountIfDistinctVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeFunctionsData<countifdistinct>>, true>;
|
||||
|
||||
char in[] = "in";
|
||||
using CustomizeInVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeFunctionsData<in>>, true>;
|
||||
@ -102,6 +105,26 @@ using CustomizeGlobalInVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeFunc
|
||||
char globalNotIn[] = "globalnotin";
|
||||
using CustomizeGlobalNotInVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeFunctionsData<globalNotIn>>, true>;
|
||||
|
||||
template <char const * func_suffix>
|
||||
struct CustomizeFunctionsSuffixData
|
||||
{
|
||||
using TypeToVisit = ASTFunction;
|
||||
|
||||
const String & customized_func_suffix;
|
||||
|
||||
void visit(ASTFunction & func, ASTPtr &)
|
||||
{
|
||||
if (endsWith(Poco::toLower(func.name), func_suffix))
|
||||
{
|
||||
size_t prefix_len = func.name.length() - strlen(func_suffix);
|
||||
func.name = func.name.substr(0, prefix_len) + customized_func_suffix;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/// Swap 'if' and 'distinct' suffixes to make execution more optimal.
|
||||
char ifDistinct[] = "ifdistinct";
|
||||
using CustomizeIfDistinctVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeFunctionsSuffixData<ifDistinct>>, true>;
|
||||
|
||||
/// Translate qualified names such as db.table.column, table.column, table_alias.column to names' normal form.
|
||||
/// Expand asterisks and qualified asterisks with column names.
|
||||
@ -1035,8 +1058,14 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(ASTPtr & query, const NamesAndTy
|
||||
|
||||
void SyntaxAnalyzer::normalize(ASTPtr & query, Aliases & aliases, const Settings & settings)
|
||||
{
|
||||
CustomizeFunctionsVisitor::Data data{settings.count_distinct_implementation};
|
||||
CustomizeFunctionsVisitor(data).visit(query);
|
||||
CustomizeCountDistinctVisitor::Data data_count_distinct{settings.count_distinct_implementation};
|
||||
CustomizeCountDistinctVisitor(data_count_distinct).visit(query);
|
||||
|
||||
CustomizeCountIfDistinctVisitor::Data data_count_if_distinct{settings.count_distinct_implementation.toString() + "If"};
|
||||
CustomizeCountIfDistinctVisitor(data_count_if_distinct).visit(query);
|
||||
|
||||
CustomizeIfDistinctVisitor::Data data_distinct_if{"DistinctIf"};
|
||||
CustomizeIfDistinctVisitor(data_distinct_if).visit(query);
|
||||
|
||||
if (settings.transform_null_in)
|
||||
{
|
||||
|
@ -1,4 +1,6 @@
|
||||
<test>
|
||||
<query>SELECT x, sum(DISTINCT y) from (SELECT number % 12 AS x, number % 12321 AS y FROM numbers(10000000)) GROUP BY x</query>
|
||||
<query>SELECT x, sum(y) from (SELECT DISTINCT number % 12 AS x, number % 12321 AS y FROM numbers(10000000)) GROUP BY x</query>
|
||||
<query>SELECT sumIf(DISTINCT number % 10, number % 5 = 2) FROM numbers(100000000)</query>
|
||||
<query>SELECT countIf(DISTINCT number % 10, number % 5 = 2) FROM numbers(100000000)</query>
|
||||
</test>
|
||||
|
@ -9,3 +9,7 @@
|
||||
21
|
||||
24
|
||||
27
|
||||
2
|
||||
SELECT uniqExactIf(number % 10, (number % 5) = 2)\nFROM numbers(10000)
|
||||
9
|
||||
SELECT sumDistinctIf(number % 10, (number % 5) = 2)\nFROM numbers(10000)
|
||||
|
@ -5,5 +5,11 @@ SELECT finalizeAggregation(countState(DISTINCT toString(number % 20))) FROM numb
|
||||
SELECT round(corrStable(DISTINCT x, y), 5) FROM (SELECT number % 10 AS x, number % 5 AS y FROM numbers(1000));
|
||||
SELECT round(corrStable(x, y), 5) FROM (SELECT DISTINCT number % 10 AS x, number % 5 AS y FROM numbers(1000));
|
||||
|
||||
SELECT sum(DISTINCT y) FROM (SELECT number % 5 AS x, number % 15 AS y FROM numbers(1000)) GROUP BY x;
|
||||
|
||||
SELECT sum(DISTINCT y) FROM (SELECT number % 5 AS x, number % 15 AS y FROM numbers(1000)) GROUP BY x;
|
||||
SET enable_debug_queries = 1;
|
||||
SELECT countIf(DISTINCT number % 10, number % 5 = 2) FROM numbers(10000);
|
||||
ANALYZE SELECT countIf(DISTINCT number % 10, number % 5 = 2) FROM numbers(10000);
|
||||
|
||||
SELECT sumIf(DISTINCT number % 10, number % 5 = 2) FROM numbers(10000);
|
||||
ANALYZE SELECT sumIf(DISTINCT number % 10, number % 5 = 2) FROM numbers(10000);
|
||||
|
Loading…
Reference in New Issue
Block a user