mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-18 04:12:19 +00:00
Fix NOT-IN index optimization when not all keys are used.
This commit is contained in:
parent
703101fe4d
commit
18a7adf0fa
@ -402,8 +402,8 @@ void Set::checkTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) c
|
||||
+ data_types[set_type_idx]->getName() + " on the right", ErrorCodes::TYPE_MISMATCH);
|
||||
}
|
||||
|
||||
MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector<KeyTuplePositionMapping> && index_mapping_)
|
||||
: indexes_mapping(std::move(index_mapping_))
|
||||
MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector<KeyTuplePositionMapping> && indexes_mapping_)
|
||||
: has_all_keys(set_elements.size() == indexes_mapping_.size()), indexes_mapping(std::move(indexes_mapping_))
|
||||
{
|
||||
std::sort(indexes_mapping.begin(), indexes_mapping.end(),
|
||||
[](const KeyTuplePositionMapping & l, const KeyTuplePositionMapping & r)
|
||||
@ -548,11 +548,11 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector<Range> & key_ranges,
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (one_element_range)
|
||||
if (one_element_range && has_all_keys)
|
||||
{
|
||||
/// Here we know that there is one element in range.
|
||||
/// The main difference with the normal case is that we can definitely say that
|
||||
/// condition in this range always TRUE (can_be_false = 0) xor always FALSE (can_be_true = 0).
|
||||
/// condition in this range is always TRUE (can_be_false = 0) or always FALSE (can_be_true = 0).
|
||||
|
||||
/// Check if it's an empty range
|
||||
if (!left_included || !right_included)
|
||||
|
@ -208,7 +208,7 @@ public:
|
||||
std::vector<FunctionBasePtr> functions;
|
||||
};
|
||||
|
||||
MergeTreeSetIndex(const Columns & set_elements, std::vector<KeyTuplePositionMapping> && index_mapping_);
|
||||
MergeTreeSetIndex(const Columns & set_elements, std::vector<KeyTuplePositionMapping> && indexes_mapping_);
|
||||
|
||||
size_t size() const { return ordered_set.at(0)->size(); }
|
||||
|
||||
@ -217,6 +217,8 @@ public:
|
||||
BoolMask checkInRange(const std::vector<Range> & key_ranges, const DataTypes & data_types) const;
|
||||
|
||||
private:
|
||||
// If all arguments in tuple are key columns, we can optimize NOT IN when there is only one element.
|
||||
bool has_all_keys;
|
||||
Columns ordered_set;
|
||||
std::vector<KeyTuplePositionMapping> indexes_mapping;
|
||||
|
||||
|
@ -4,3 +4,5 @@
|
||||
7 107
|
||||
8 108
|
||||
9 109
|
||||
1970-01-01 1 one
|
||||
1970-01-01 3 three
|
||||
|
@ -8,3 +8,18 @@ set max_rows_to_read = 5;
|
||||
select * from test1 where i not in (1,2,3,4,5) order by i;
|
||||
|
||||
drop table test1;
|
||||
|
||||
drop table if exists t1;
|
||||
drop table if exists t2;
|
||||
|
||||
create table t1 (date Date, a Float64, b String) Engine=MergeTree ORDER BY date;
|
||||
create table t2 (date Date, a Float64, b String) Engine=MergeTree ORDER BY date;
|
||||
|
||||
insert into t1(a, b) values (1, 'one'), (2, 'two');
|
||||
insert into t2(a, b) values (2, 'two'), (3, 'three');
|
||||
|
||||
select date, a, b from t1 where (date, a, b) NOT IN (select date,a,b from t2);
|
||||
select date, a, b from t2 where (date, a, b) NOT IN (select date,a,b from t1);
|
||||
|
||||
drop table t1;
|
||||
drop table t2;
|
||||
|
Loading…
Reference in New Issue
Block a user