diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index 5ab59ba3f07..5304859aeea 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -402,8 +402,8 @@ void Set::checkTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) c + data_types[set_type_idx]->getName() + " on the right", ErrorCodes::TYPE_MISMATCH); } -MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector && index_mapping_) - : indexes_mapping(std::move(index_mapping_)) +MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector && indexes_mapping_) + : has_all_keys(set_elements.size() == indexes_mapping_.size()), indexes_mapping(std::move(indexes_mapping_)) { std::sort(indexes_mapping.begin(), indexes_mapping.end(), [](const KeyTuplePositionMapping & l, const KeyTuplePositionMapping & r) @@ -548,11 +548,11 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector & key_ranges, break; } } - if (one_element_range) + if (one_element_range && has_all_keys) { /// Here we know that there is one element in range. /// The main difference with the normal case is that we can definitely say that - /// condition in this range always TRUE (can_be_false = 0) xor always FALSE (can_be_true = 0). + /// condition in this range is always TRUE (can_be_false = 0) or always FALSE (can_be_true = 0). /// Check if it's an empty range if (!left_included || !right_included) diff --git a/src/Interpreters/Set.h b/src/Interpreters/Set.h index 727a2c144a1..578913dd0d2 100644 --- a/src/Interpreters/Set.h +++ b/src/Interpreters/Set.h @@ -208,7 +208,7 @@ public: std::vector functions; }; - MergeTreeSetIndex(const Columns & set_elements, std::vector && index_mapping_); + MergeTreeSetIndex(const Columns & set_elements, std::vector && indexes_mapping_); size_t size() const { return ordered_set.at(0)->size(); } @@ -217,6 +217,8 @@ public: BoolMask checkInRange(const std::vector & key_ranges, const DataTypes & data_types) const; private: + // If all arguments in tuple are key columns, we can optimize NOT IN when there is only one element. + bool has_all_keys; Columns ordered_set; std::vector indexes_mapping; diff --git a/tests/queries/0_stateless/01891_not_in_partition_prune.reference b/tests/queries/0_stateless/01891_not_in_partition_prune.reference index 628053cd4f8..9d2517ad760 100644 --- a/tests/queries/0_stateless/01891_not_in_partition_prune.reference +++ b/tests/queries/0_stateless/01891_not_in_partition_prune.reference @@ -4,3 +4,5 @@ 7 107 8 108 9 109 +1970-01-01 1 one +1970-01-01 3 three diff --git a/tests/queries/0_stateless/01891_not_in_partition_prune.sql b/tests/queries/0_stateless/01891_not_in_partition_prune.sql index edbfad93e5d..5bf90fdd65c 100644 --- a/tests/queries/0_stateless/01891_not_in_partition_prune.sql +++ b/tests/queries/0_stateless/01891_not_in_partition_prune.sql @@ -8,3 +8,18 @@ set max_rows_to_read = 5; select * from test1 where i not in (1,2,3,4,5) order by i; drop table test1; + +drop table if exists t1; +drop table if exists t2; + +create table t1 (date Date, a Float64, b String) Engine=MergeTree ORDER BY date; +create table t2 (date Date, a Float64, b String) Engine=MergeTree ORDER BY date; + +insert into t1(a, b) values (1, 'one'), (2, 'two'); +insert into t2(a, b) values (2, 'two'), (3, 'three'); + +select date, a, b from t1 where (date, a, b) NOT IN (select date,a,b from t2); +select date, a, b from t2 where (date, a, b) NOT IN (select date,a,b from t1); + +drop table t1; +drop table t2;