Merge pull request #71739 from ClickHouse/backport/24.9/71695

Backport #71695 to 24.9: fix: transform set while partition pruning only if allowed
This commit is contained in:
robot-ch-test-poll3 2024-11-11 16:21:30 +01:00 committed by GitHub
commit 0d4a4fdbd6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 100 additions and 2 deletions

View File

@ -1146,6 +1146,7 @@ bool KeyCondition::tryPrepareSetIndex(
const RPNBuilderFunctionTreeNode & func,
RPNElement & out,
size_t & out_key_column_num,
bool & allow_constant_transformation,
bool & is_constant_transformed)
{
const auto & left_arg = func.getArgumentAt(0);
@ -1172,7 +1173,9 @@ bool KeyCondition::tryPrepareSetIndex(
set_transforming_chains.push_back(set_transforming_chain);
}
// For partition index, checking if set can be transformed to prune any partitions
else if (single_point && canSetValuesBeWrappedByFunctions(node, index_mapping.key_index, data_type, set_transforming_chain))
else if (
single_point && allow_constant_transformation
&& canSetValuesBeWrappedByFunctions(node, index_mapping.key_index, data_type, set_transforming_chain))
{
indexes_mapping.push_back(index_mapping);
data_types.push_back(data_type);
@ -1871,7 +1874,7 @@ bool KeyCondition::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNEleme
if (functionIsInOrGlobalInOperator(func_name))
{
if (tryPrepareSetIndex(func, out, key_column_num, is_constant_transformed))
if (tryPrepareSetIndex(func, out, key_column_num, allow_constant_transformation, is_constant_transformed))
{
key_arg_pos = 0;
is_set_const = true;

View File

@ -295,6 +295,7 @@ private:
const RPNBuilderFunctionTreeNode & func,
RPNElement & out,
size_t & out_key_column_num,
bool & allow_constant_transformation,
bool & is_constant_transformed);
/// Checks that the index can not be used.

View File

@ -0,0 +1,13 @@
-- Monotonic function in partition key
48
48
-- Non-monotonic function in partition key
48
48
-- Multiple partition columns
50
50
96
96
98
98

View File

@ -0,0 +1,81 @@
-- Related to https://github.com/ClickHouse/ClickHouse/issues/69829
--
-- The main goal of the test is to assert that constant transformation
-- for set constant while partition pruning won't be performed
-- if it's not allowed (NOT IN operator case)
DROP TABLE IF EXISTS 03269_filters;
CREATE TABLE 03269_filters (
id Int32,
dt Date
)
engine = MergeTree
order by id;
INSERT INTO 03269_filters
SELECT 6, '2020-01-01'
UNION ALL
SELECT 38, '2021-01-01';
SELECT '-- Monotonic function in partition key';
DROP TABLE IF EXISTS 03269_single_monotonic;
CREATE TABLE 03269_single_monotonic(
id Int32
)
ENGINE = MergeTree
PARTITION BY intDiv(id, 10)
ORDER BY id;
INSERT INTO 03269_single_monotonic SELECT number FROM numbers(50);
SELECT count() FROM 03269_single_monotonic WHERE id NOT IN (6, 38);
SELECT count() FROM 03269_single_monotonic WHERE id NOT IN (
SELECT id FROM 03269_filters
);
DROP TABLE 03269_single_monotonic;
SELECT '-- Non-monotonic function in partition key';
DROP TABLE IF EXISTS 03269_single_non_monotonic;
CREATE TABLE 03269_single_non_monotonic (
id Int32
)
ENGINE = MergeTree
PARTITION BY id % 10
ORDER BY id;
INSERT INTO 03269_single_non_monotonic SELECT number FROM numbers(50);
SELECT count() FROM 03269_single_non_monotonic WHERE id NOT IN (6, 38);
SELECT count() FROM 03269_single_non_monotonic WHERE id NOT IN (SELECT id FROM 03269_filters);
DROP TABLE 03269_single_non_monotonic;
SELECT '-- Multiple partition columns';
DROP TABLE IF EXISTS 03269_multiple_part_cols;
CREATE TABLE 03269_multiple_part_cols (
id Int32,
dt Date,
)
ENGINE = MergeTree
PARTITION BY (dt, intDiv(id, 10))
ORDER BY id;
INSERT INTO 03269_multiple_part_cols
SELECT number, '2020-01-01' FROM numbers(50)
UNION ALL
SELECT number, '2021-01-01' FROM numbers(50);
SELECT count() FROM 03269_multiple_part_cols WHERE dt NOT IN ('2020-01-01');
SELECT count() FROM 03269_multiple_part_cols WHERE dt NOT IN (SELECT dt FROM 03269_filters WHERE dt < '2021-01-01');
SELECT count() FROM 03269_multiple_part_cols WHERE id NOT IN (6, 38);
SELECT count() FROM 03269_multiple_part_cols WHERE id NOT IN (SELECT id FROM 03269_filters);
SELECT count() FROM 03269_multiple_part_cols WHERE (id, dt) NOT IN ((6, '2020-01-01'), (38, '2021-01-01'));
SELECT count() FROM 03269_multiple_part_cols WHERE (id, dt) NOT IN (SELECT id, dt FROM 03269_filters);
DROP TABLE 03269_multiple_part_cols;