Merge pull request #61567 from Avogar/fix-analyzer-group-by-use-nulls

Fix logical error in group_by_use_nulls + grouping set + analyzer + materialize/constant
This commit is contained in:
Kruglov Pavel 2024-03-28 14:38:11 +01:00 committed by GitHub
commit facde7541f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 91 additions and 5 deletions

View File

@ -4,6 +4,7 @@
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/ConstantValue.h>
#include <DataTypes/DataTypeNullable.h>
namespace DB
{
@ -86,6 +87,11 @@ public:
mask_id = id;
}
void convertToNullable() override
{
constant_value = std::make_shared<ConstantValue>(constant_value->getValue(), makeNullableSafe(constant_value->getType()));
}
void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override;
protected:

View File

@ -31,6 +31,12 @@ public:
if (!getSettings().optimize_group_by_function_keys)
return;
/// When group_by_use_nulls = 1 removing keys from GROUP BY can lead
/// to unexpected types in some functions.
/// See example in https://github.com/ClickHouse/ClickHouse/pull/61567#issuecomment-2018007887
if (getSettings().group_by_use_nulls)
return;
auto * query = node->as<QueryNode>();
if (!query)
return;
@ -73,12 +79,14 @@ private:
candidates.push_back({ *it, is_deterministic });
/// Using DFS we traverse function tree and try to find if it uses other keys as function arguments.
bool found_at_least_one_usage = false;
while (!candidates.empty())
{
auto [candidate, parents_are_only_deterministic] = candidates.back();
candidates.pop_back();
bool found = group_by_keys.contains(candidate);
found_at_least_one_usage |= found;
switch (candidate->getNodeType())
{
@ -111,7 +119,7 @@ private:
}
}
return true;
return found_at_least_one_usage;
}
static void optimizeGroupingSet(QueryTreeNodes & grouping_set)

View File

@ -43,6 +43,13 @@ public:
if (!getSettings().optimize_injective_functions_in_group_by)
return;
/// Don't optimize injective functions when group_by_use_nulls=true,
/// because in this case we make initial group by keys Nullable
/// and eliminating some functions can cause issues with arguments Nullability
/// during their execution. See examples in https://github.com/ClickHouse/ClickHouse/pull/61567#issuecomment-2008181143
if (getSettings().group_by_use_nulls)
return;
auto * query = node->as<QueryNode>();
if (!query)
return;

View File

@ -36,6 +36,8 @@ public:
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
bool isSuitableForConstantFolding() const override { return false; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
size_t getNumberOfArguments() const override

View File

@ -5,6 +5,6 @@ set allow_experimental_analyzer = 0;
SELECT count() FROM test1__fuzz_37 GROUP BY dictHas(NULL, (dictHas(NULL, (('', materialize(NULL)), materialize(NULL))), 'KeyKey')), dictHas('test_dictionary', tuple(materialize('Ke\0'))), tuple(dictHas(NULL, (tuple('Ke\0Ke\0Ke\0Ke\0Ke\0Ke\0\0\0\0Ke\0'), materialize(NULL)))), 'test_dicti\0nary', (('', materialize(NULL)), dictHas(NULL, (dictHas(NULL, tuple(materialize(NULL))), 'KeyKeyKeyKeyKeyKeyKeyKey')), materialize(NULL)); -- { serverError BAD_ARGUMENTS }
SELECT count() FROM test1__fuzz_37 GROUP BY dictHas('non_existing_dictionary', materialize('a')); -- { serverError BAD_ARGUMENTS }
set allow_experimental_analyzer = 1;
SELECT count() FROM test1__fuzz_37 GROUP BY dictHas(NULL, (dictHas(NULL, (('', materialize(NULL)), materialize(NULL))), 'KeyKey')), dictHas('test_dictionary', tuple(materialize('Ke\0'))), tuple(dictHas(NULL, (tuple('Ke\0Ke\0Ke\0Ke\0Ke\0Ke\0\0\0\0Ke\0'), materialize(NULL)))), 'test_dicti\0nary', (('', materialize(NULL)), dictHas(NULL, (dictHas(NULL, tuple(materialize(NULL))), 'KeyKeyKeyKeyKeyKeyKeyKey')), materialize(NULL));
SELECT count() FROM test1__fuzz_37 GROUP BY dictHas('non_existing_dictionary', materialize('a'));
SELECT count() FROM test1__fuzz_37 GROUP BY dictHas(NULL, (dictHas(NULL, (('', materialize(NULL)), materialize(NULL))), 'KeyKey')), dictHas('test_dictionary', tuple(materialize('Ke\0'))), tuple(dictHas(NULL, (tuple('Ke\0Ke\0Ke\0Ke\0Ke\0Ke\0\0\0\0Ke\0'), materialize(NULL)))), 'test_dicti\0nary', (('', materialize(NULL)), dictHas(NULL, (dictHas(NULL, tuple(materialize(NULL))), 'KeyKeyKeyKeyKeyKeyKeyKey')), materialize(NULL)); -- { serverError BAD_ARGUMENTS }
SELECT count() FROM test1__fuzz_37 GROUP BY dictHas('non_existing_dictionary', materialize('a')); -- { serverError BAD_ARGUMENTS }
DROP TABLE test1__fuzz_37;

View File

@ -0,0 +1,10 @@
6
6
3
\N
0
\N
0
\N
0
\N

View File

@ -0,0 +1,11 @@
set allow_experimental_analyzer = 1;
set group_by_use_nulls = 1;
set optimize_group_by_function_keys = 1;
set optimize_injective_functions_in_group_by = 1;
SELECT 3 + 3 from numbers(10) GROUP BY GROUPING SETS (('str'), (3 + 3)) order by all;
SELECT materialize(3) from numbers(10) GROUP BY GROUPING SETS (('str'), (materialize(3))) order by all;
SELECT ignore(3) from numbers(10) GROUP BY GROUPING SETS (('str'), (ignore(3))) order by all;
SELECT materialize(ignore(3)) from numbers(10) GROUP BY GROUPING SETS (('str'), (materialize(ignore(3)))) order by all;
SELECT ignore(materialize(3)) from numbers(10) GROUP BY GROUPING SETS (('str'), (ignore(materialize(3)))) order by all;

View File

@ -0,0 +1,24 @@
3
4
5
6
7
8
9
10
11
12
\N
(((0)))
(((0)))
(((1)))
(((2)))
(((3)))
(((4)))
(((5)))
(((6)))
(((7)))
(((8)))
(((9)))
6
6

View File

@ -0,0 +1,5 @@
set allow_experimental_analyzer=1, group_by_use_nulls=1, optimize_injective_functions_in_group_by=1;
SELECT bitNot(bitNot(number)) + 3 FROM numbers(10) GROUP BY GROUPING SETS (('str', bitNot(bitNot(number))), ('str')) order by all;
SELECT tuple(tuple(tuple(number))) FROM numbers(10) GROUP BY GROUPING SETS (('str', tuple(tuple(number))), ('str')) order by all;
SELECT materialize(3) + 3 FROM numbers(10) GROUP BY GROUPING SETS (('str', materialize(materialize(3))), ('str')) order by all;

View File

@ -0,0 +1,10 @@
0
0
0
0
0
0
0
0
0
0

View File

@ -0,0 +1,5 @@
set allow_experimental_analyzer=1;
set group_by_use_nulls=1;
set optimize_group_by_function_keys=1;
SELECT ignore(toLowCardinality(number)) FROM numbers(10) GROUP BY GROUPING SETS ((ignore(toLowCardinality(number)), toLowCardinality(number)));