mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-18 04:12:19 +00:00
Fix NOT-IN index optimization when not all keys are used.
This commit is contained in:
parent
703101fe4d
commit
18a7adf0fa
@ -402,8 +402,8 @@ void Set::checkTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) c
|
|||||||
+ data_types[set_type_idx]->getName() + " on the right", ErrorCodes::TYPE_MISMATCH);
|
+ data_types[set_type_idx]->getName() + " on the right", ErrorCodes::TYPE_MISMATCH);
|
||||||
}
|
}
|
||||||
|
|
||||||
MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector<KeyTuplePositionMapping> && index_mapping_)
|
MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector<KeyTuplePositionMapping> && indexes_mapping_)
|
||||||
: indexes_mapping(std::move(index_mapping_))
|
: has_all_keys(set_elements.size() == indexes_mapping_.size()), indexes_mapping(std::move(indexes_mapping_))
|
||||||
{
|
{
|
||||||
std::sort(indexes_mapping.begin(), indexes_mapping.end(),
|
std::sort(indexes_mapping.begin(), indexes_mapping.end(),
|
||||||
[](const KeyTuplePositionMapping & l, const KeyTuplePositionMapping & r)
|
[](const KeyTuplePositionMapping & l, const KeyTuplePositionMapping & r)
|
||||||
@ -548,11 +548,11 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector<Range> & key_ranges,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (one_element_range)
|
if (one_element_range && has_all_keys)
|
||||||
{
|
{
|
||||||
/// Here we know that there is one element in range.
|
/// Here we know that there is one element in range.
|
||||||
/// The main difference with the normal case is that we can definitely say that
|
/// The main difference with the normal case is that we can definitely say that
|
||||||
/// condition in this range always TRUE (can_be_false = 0) xor always FALSE (can_be_true = 0).
|
/// condition in this range is always TRUE (can_be_false = 0) or always FALSE (can_be_true = 0).
|
||||||
|
|
||||||
/// Check if it's an empty range
|
/// Check if it's an empty range
|
||||||
if (!left_included || !right_included)
|
if (!left_included || !right_included)
|
||||||
|
@ -208,7 +208,7 @@ public:
|
|||||||
std::vector<FunctionBasePtr> functions;
|
std::vector<FunctionBasePtr> functions;
|
||||||
};
|
};
|
||||||
|
|
||||||
MergeTreeSetIndex(const Columns & set_elements, std::vector<KeyTuplePositionMapping> && index_mapping_);
|
MergeTreeSetIndex(const Columns & set_elements, std::vector<KeyTuplePositionMapping> && indexes_mapping_);
|
||||||
|
|
||||||
size_t size() const { return ordered_set.at(0)->size(); }
|
size_t size() const { return ordered_set.at(0)->size(); }
|
||||||
|
|
||||||
@ -217,6 +217,8 @@ public:
|
|||||||
BoolMask checkInRange(const std::vector<Range> & key_ranges, const DataTypes & data_types) const;
|
BoolMask checkInRange(const std::vector<Range> & key_ranges, const DataTypes & data_types) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
// If all arguments in tuple are key columns, we can optimize NOT IN when there is only one element.
|
||||||
|
bool has_all_keys;
|
||||||
Columns ordered_set;
|
Columns ordered_set;
|
||||||
std::vector<KeyTuplePositionMapping> indexes_mapping;
|
std::vector<KeyTuplePositionMapping> indexes_mapping;
|
||||||
|
|
||||||
|
@ -4,3 +4,5 @@
|
|||||||
7 107
|
7 107
|
||||||
8 108
|
8 108
|
||||||
9 109
|
9 109
|
||||||
|
1970-01-01 1 one
|
||||||
|
1970-01-01 3 three
|
||||||
|
@ -8,3 +8,18 @@ set max_rows_to_read = 5;
|
|||||||
select * from test1 where i not in (1,2,3,4,5) order by i;
|
select * from test1 where i not in (1,2,3,4,5) order by i;
|
||||||
|
|
||||||
drop table test1;
|
drop table test1;
|
||||||
|
|
||||||
|
drop table if exists t1;
|
||||||
|
drop table if exists t2;
|
||||||
|
|
||||||
|
create table t1 (date Date, a Float64, b String) Engine=MergeTree ORDER BY date;
|
||||||
|
create table t2 (date Date, a Float64, b String) Engine=MergeTree ORDER BY date;
|
||||||
|
|
||||||
|
insert into t1(a, b) values (1, 'one'), (2, 'two');
|
||||||
|
insert into t2(a, b) values (2, 'two'), (3, 'three');
|
||||||
|
|
||||||
|
select date, a, b from t1 where (date, a, b) NOT IN (select date,a,b from t2);
|
||||||
|
select date, a, b from t2 where (date, a, b) NOT IN (select date,a,b from t1);
|
||||||
|
|
||||||
|
drop table t1;
|
||||||
|
drop table t2;
|
||||||
|
Loading…
Reference in New Issue
Block a user