Fix NOT-IN index optimization when not all keys are used.

This commit is contained in:
Amos Bird 2021-08-29 16:49:30 +08:00
parent 703101fe4d
commit 18a7adf0fa
No known key found for this signature in database
GPG Key ID: 80D430DCBECFEDB4
4 changed files with 24 additions and 5 deletions

View File

@ -402,8 +402,8 @@ void Set::checkTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) c
+ data_types[set_type_idx]->getName() + " on the right", ErrorCodes::TYPE_MISMATCH);
}
MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector<KeyTuplePositionMapping> && index_mapping_)
: indexes_mapping(std::move(index_mapping_))
MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector<KeyTuplePositionMapping> && indexes_mapping_)
: has_all_keys(set_elements.size() == indexes_mapping_.size()), indexes_mapping(std::move(indexes_mapping_))
{
std::sort(indexes_mapping.begin(), indexes_mapping.end(),
[](const KeyTuplePositionMapping & l, const KeyTuplePositionMapping & r)
@ -548,11 +548,11 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector<Range> & key_ranges,
break;
}
}
if (one_element_range)
if (one_element_range && has_all_keys)
{
/// Here we know that there is one element in range.
/// The main difference with the normal case is that we can definitely say that
/// condition in this range always TRUE (can_be_false = 0) xor always FALSE (can_be_true = 0).
/// condition in this range is always TRUE (can_be_false = 0) or always FALSE (can_be_true = 0).
/// Check if it's an empty range
if (!left_included || !right_included)

View File

@ -208,7 +208,7 @@ public:
std::vector<FunctionBasePtr> functions;
};
MergeTreeSetIndex(const Columns & set_elements, std::vector<KeyTuplePositionMapping> && index_mapping_);
MergeTreeSetIndex(const Columns & set_elements, std::vector<KeyTuplePositionMapping> && indexes_mapping_);
size_t size() const { return ordered_set.at(0)->size(); }
@ -217,6 +217,8 @@ public:
BoolMask checkInRange(const std::vector<Range> & key_ranges, const DataTypes & data_types) const;
private:
// If all arguments in tuple are key columns, we can optimize NOT IN when there is only one element.
bool has_all_keys;
Columns ordered_set;
std::vector<KeyTuplePositionMapping> indexes_mapping;

View File

@ -4,3 +4,5 @@
7 107
8 108
9 109
1970-01-01 1 one
1970-01-01 3 three

View File

@ -8,3 +8,18 @@ set max_rows_to_read = 5;
select * from test1 where i not in (1,2,3,4,5) order by i;
drop table test1;
drop table if exists t1;
drop table if exists t2;
create table t1 (date Date, a Float64, b String) Engine=MergeTree ORDER BY date;
create table t2 (date Date, a Float64, b String) Engine=MergeTree ORDER BY date;
insert into t1(a, b) values (1, 'one'), (2, 'two');
insert into t2(a, b) values (2, 'two'), (3, 'three');
select date, a, b from t1 where (date, a, b) NOT IN (select date,a,b from t2);
select date, a, b from t2 where (date, a, b) NOT IN (select date,a,b from t1);
drop table t1;
drop table t2;