mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 23:52:03 +00:00
Merge pull request #61567 from Avogar/fix-analyzer-group-by-use-nulls
Fix logical error in group_by_use_nulls + grouping set + analyzer + materialize/constant
This commit is contained in:
commit
facde7541f
@ -4,6 +4,7 @@
|
||||
|
||||
#include <Analyzer/IQueryTreeNode.h>
|
||||
#include <Analyzer/ConstantValue.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -86,6 +87,11 @@ public:
|
||||
mask_id = id;
|
||||
}
|
||||
|
||||
void convertToNullable() override
|
||||
{
|
||||
constant_value = std::make_shared<ConstantValue>(constant_value->getValue(), makeNullableSafe(constant_value->getType()));
|
||||
}
|
||||
|
||||
void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override;
|
||||
|
||||
protected:
|
||||
|
@ -31,6 +31,12 @@ public:
|
||||
if (!getSettings().optimize_group_by_function_keys)
|
||||
return;
|
||||
|
||||
/// When group_by_use_nulls = 1 removing keys from GROUP BY can lead
|
||||
/// to unexpected types in some functions.
|
||||
/// See example in https://github.com/ClickHouse/ClickHouse/pull/61567#issuecomment-2018007887
|
||||
if (getSettings().group_by_use_nulls)
|
||||
return;
|
||||
|
||||
auto * query = node->as<QueryNode>();
|
||||
if (!query)
|
||||
return;
|
||||
@ -73,12 +79,14 @@ private:
|
||||
candidates.push_back({ *it, is_deterministic });
|
||||
|
||||
/// Using DFS we traverse function tree and try to find if it uses other keys as function arguments.
|
||||
bool found_at_least_one_usage = false;
|
||||
while (!candidates.empty())
|
||||
{
|
||||
auto [candidate, parents_are_only_deterministic] = candidates.back();
|
||||
candidates.pop_back();
|
||||
|
||||
bool found = group_by_keys.contains(candidate);
|
||||
found_at_least_one_usage |= found;
|
||||
|
||||
switch (candidate->getNodeType())
|
||||
{
|
||||
@ -111,7 +119,7 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
return found_at_least_one_usage;
|
||||
}
|
||||
|
||||
static void optimizeGroupingSet(QueryTreeNodes & grouping_set)
|
||||
|
@ -43,6 +43,13 @@ public:
|
||||
if (!getSettings().optimize_injective_functions_in_group_by)
|
||||
return;
|
||||
|
||||
/// Don't optimize injective functions when group_by_use_nulls=true,
|
||||
/// because in this case we make initial group by keys Nullable
|
||||
/// and eliminating some functions can cause issues with arguments Nullability
|
||||
/// during their execution. See examples in https://github.com/ClickHouse/ClickHouse/pull/61567#issuecomment-2008181143
|
||||
if (getSettings().group_by_use_nulls)
|
||||
return;
|
||||
|
||||
auto * query = node->as<QueryNode>();
|
||||
if (!query)
|
||||
return;
|
||||
|
@ -36,6 +36,8 @@ public:
|
||||
|
||||
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
|
||||
|
||||
bool isSuitableForConstantFolding() const override { return false; }
|
||||
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
|
||||
|
||||
size_t getNumberOfArguments() const override
|
||||
|
@ -1,2 +0,0 @@
|
||||
1
|
||||
1
|
@ -5,6 +5,6 @@ set allow_experimental_analyzer = 0;
|
||||
SELECT count() FROM test1__fuzz_37 GROUP BY dictHas(NULL, (dictHas(NULL, (('', materialize(NULL)), materialize(NULL))), 'KeyKey')), dictHas('test_dictionary', tuple(materialize('Ke\0'))), tuple(dictHas(NULL, (tuple('Ke\0Ke\0Ke\0Ke\0Ke\0Ke\0\0\0\0Ke\0'), materialize(NULL)))), 'test_dicti\0nary', (('', materialize(NULL)), dictHas(NULL, (dictHas(NULL, tuple(materialize(NULL))), 'KeyKeyKeyKeyKeyKeyKeyKey')), materialize(NULL)); -- { serverError BAD_ARGUMENTS }
|
||||
SELECT count() FROM test1__fuzz_37 GROUP BY dictHas('non_existing_dictionary', materialize('a')); -- { serverError BAD_ARGUMENTS }
|
||||
set allow_experimental_analyzer = 1;
|
||||
SELECT count() FROM test1__fuzz_37 GROUP BY dictHas(NULL, (dictHas(NULL, (('', materialize(NULL)), materialize(NULL))), 'KeyKey')), dictHas('test_dictionary', tuple(materialize('Ke\0'))), tuple(dictHas(NULL, (tuple('Ke\0Ke\0Ke\0Ke\0Ke\0Ke\0\0\0\0Ke\0'), materialize(NULL)))), 'test_dicti\0nary', (('', materialize(NULL)), dictHas(NULL, (dictHas(NULL, tuple(materialize(NULL))), 'KeyKeyKeyKeyKeyKeyKeyKey')), materialize(NULL));
|
||||
SELECT count() FROM test1__fuzz_37 GROUP BY dictHas('non_existing_dictionary', materialize('a'));
|
||||
SELECT count() FROM test1__fuzz_37 GROUP BY dictHas(NULL, (dictHas(NULL, (('', materialize(NULL)), materialize(NULL))), 'KeyKey')), dictHas('test_dictionary', tuple(materialize('Ke\0'))), tuple(dictHas(NULL, (tuple('Ke\0Ke\0Ke\0Ke\0Ke\0Ke\0\0\0\0Ke\0'), materialize(NULL)))), 'test_dicti\0nary', (('', materialize(NULL)), dictHas(NULL, (dictHas(NULL, tuple(materialize(NULL))), 'KeyKeyKeyKeyKeyKeyKeyKey')), materialize(NULL)); -- { serverError BAD_ARGUMENTS }
|
||||
SELECT count() FROM test1__fuzz_37 GROUP BY dictHas('non_existing_dictionary', materialize('a')); -- { serverError BAD_ARGUMENTS }
|
||||
DROP TABLE test1__fuzz_37;
|
||||
|
@ -0,0 +1,10 @@
|
||||
6
|
||||
6
|
||||
3
|
||||
\N
|
||||
0
|
||||
\N
|
||||
0
|
||||
\N
|
||||
0
|
||||
\N
|
@ -0,0 +1,11 @@
|
||||
set allow_experimental_analyzer = 1;
|
||||
set group_by_use_nulls = 1;
|
||||
set optimize_group_by_function_keys = 1;
|
||||
set optimize_injective_functions_in_group_by = 1;
|
||||
|
||||
SELECT 3 + 3 from numbers(10) GROUP BY GROUPING SETS (('str'), (3 + 3)) order by all;
|
||||
SELECT materialize(3) from numbers(10) GROUP BY GROUPING SETS (('str'), (materialize(3))) order by all;
|
||||
SELECT ignore(3) from numbers(10) GROUP BY GROUPING SETS (('str'), (ignore(3))) order by all;
|
||||
SELECT materialize(ignore(3)) from numbers(10) GROUP BY GROUPING SETS (('str'), (materialize(ignore(3)))) order by all;
|
||||
SELECT ignore(materialize(3)) from numbers(10) GROUP BY GROUPING SETS (('str'), (ignore(materialize(3)))) order by all;
|
||||
|
@ -0,0 +1,24 @@
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
10
|
||||
11
|
||||
12
|
||||
\N
|
||||
(((0)))
|
||||
(((0)))
|
||||
(((1)))
|
||||
(((2)))
|
||||
(((3)))
|
||||
(((4)))
|
||||
(((5)))
|
||||
(((6)))
|
||||
(((7)))
|
||||
(((8)))
|
||||
(((9)))
|
||||
6
|
||||
6
|
@ -0,0 +1,5 @@
|
||||
set allow_experimental_analyzer=1, group_by_use_nulls=1, optimize_injective_functions_in_group_by=1;
|
||||
SELECT bitNot(bitNot(number)) + 3 FROM numbers(10) GROUP BY GROUPING SETS (('str', bitNot(bitNot(number))), ('str')) order by all;
|
||||
SELECT tuple(tuple(tuple(number))) FROM numbers(10) GROUP BY GROUPING SETS (('str', tuple(tuple(number))), ('str')) order by all;
|
||||
SELECT materialize(3) + 3 FROM numbers(10) GROUP BY GROUPING SETS (('str', materialize(materialize(3))), ('str')) order by all;
|
||||
|
@ -0,0 +1,10 @@
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
@ -0,0 +1,5 @@
|
||||
set allow_experimental_analyzer=1;
|
||||
set group_by_use_nulls=1;
|
||||
set optimize_group_by_function_keys=1;
|
||||
SELECT ignore(toLowCardinality(number)) FROM numbers(10) GROUP BY GROUPING SETS ((ignore(toLowCardinality(number)), toLowCardinality(number)));
|
||||
|
Loading…
Reference in New Issue
Block a user