mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 07:31:57 +00:00
Merge pull request #33827 from kitaisreal/range-hashed-dictionary-handle-invalid-intervals
RangeHashedDictionary handle invalid intervals
This commit is contained in:
commit
502c1637d5
@ -10,6 +10,7 @@ namespace DB
|
||||
{
|
||||
|
||||
/** Structure that holds closed interval with left and right.
|
||||
* Interval left must be less than interval right.
|
||||
* Example: [1, 1] is valid interval, that contain point 1.
|
||||
*/
|
||||
template <typename TIntervalStorageType>
|
||||
@ -70,6 +71,9 @@ struct IntervalTreeVoidValue
|
||||
* Search for all intervals intersecting point has complexity O(log(n) + k), k is count of intervals that intersect point.
|
||||
* If we need to only check if there are some interval intersecting point such operation has complexity O(log(n)).
|
||||
*
|
||||
* There is invariant that interval left must be less than interval right, otherwise such interval could not contain any point.
|
||||
* If that invariant is broken, inserting such interval in IntervalTree will return false.
|
||||
*
|
||||
* Explanation:
|
||||
*
|
||||
* IntervalTree structure is balanced tree. Each node contains:
|
||||
@ -125,44 +129,48 @@ public:
|
||||
IntervalTree() { nodes.resize(1); }
|
||||
|
||||
template <typename TValue = Value, std::enable_if_t<std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true>
|
||||
void emplace(Interval interval)
|
||||
ALWAYS_INLINE bool emplace(Interval interval)
|
||||
{
|
||||
assert(!tree_is_built);
|
||||
if (unlikely(interval.left > interval.right))
|
||||
return false;
|
||||
|
||||
sorted_intervals.emplace_back(interval);
|
||||
increaseIntervalsSize();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename TValue = Value, std::enable_if_t<!std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true, typename... Args>
|
||||
void emplace(Interval interval, Args &&... args)
|
||||
ALWAYS_INLINE bool emplace(Interval interval, Args &&... args)
|
||||
{
|
||||
assert(!tree_is_built);
|
||||
if (unlikely(interval.left > interval.right))
|
||||
return false;
|
||||
|
||||
sorted_intervals.emplace_back(
|
||||
std::piecewise_construct, std::forward_as_tuple(interval), std::forward_as_tuple(std::forward<Args>(args)...));
|
||||
increaseIntervalsSize();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename TValue = Value, std::enable_if_t<std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true>
|
||||
void insert(Interval interval)
|
||||
bool insert(Interval interval)
|
||||
{
|
||||
assert(!tree_is_built);
|
||||
sorted_intervals.emplace_back(interval);
|
||||
increaseIntervalsSize();
|
||||
return emplace(interval);
|
||||
}
|
||||
|
||||
template <typename TValue = Value, std::enable_if_t<!std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true>
|
||||
void insert(Interval interval, const Value & value)
|
||||
bool insert(Interval interval, const Value & value)
|
||||
{
|
||||
assert(!tree_is_built);
|
||||
sorted_intervals.emplace_back(interval, value);
|
||||
increaseIntervalsSize();
|
||||
return emplace(interval, value);
|
||||
}
|
||||
|
||||
template <typename TValue = Value, std::enable_if_t<!std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true>
|
||||
void insert(Interval interval, Value && value)
|
||||
bool insert(Interval interval, Value && value)
|
||||
{
|
||||
assert(!tree_is_built);
|
||||
sorted_intervals.emplace_back(interval, std::move(value));
|
||||
increaseIntervalsSize();
|
||||
return emplace(interval, std::move(value));
|
||||
}
|
||||
|
||||
/// Build tree, after that intervals cannot be inserted, and only search or iteration can be performed.
|
||||
|
@ -309,6 +309,29 @@ TEST(IntervalTree, IntervalSetIterators)
|
||||
}
|
||||
}
|
||||
|
||||
TEST(IntervalTree, IntervalSetInvalidInterval)
|
||||
{
|
||||
IntervalSet<Int64Interval> interval_set;
|
||||
ASSERT_TRUE(!interval_set.insert(Int64Interval(10, 0)));
|
||||
ASSERT_TRUE(!interval_set.insert(Int64Interval(15, 10)));
|
||||
ASSERT_TRUE(interval_set.insert(Int64Interval(20, 25)));
|
||||
|
||||
std::set<Int64Interval> expected;
|
||||
expected.insert({20, 25});
|
||||
|
||||
auto actual = intervalSetFindIntervals(interval_set, 20);
|
||||
|
||||
ASSERT_TRUE(actual == expected);
|
||||
ASSERT_TRUE(interval_set.has(20));
|
||||
|
||||
interval_set.build();
|
||||
|
||||
actual = intervalSetFindIntervals(interval_set, 20);
|
||||
|
||||
ASSERT_TRUE(actual == expected);
|
||||
ASSERT_TRUE(interval_set.has(20));
|
||||
}
|
||||
|
||||
TEST(IntervalTree, IntervalMapBasic)
|
||||
{
|
||||
for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size)
|
||||
@ -538,3 +561,26 @@ TEST(IntervalTree, IntervalMapIterators)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(IntervalTree, IntervalMapInvalidInterval)
|
||||
{
|
||||
IntervalMap<Int64Interval, std::string> interval_map;
|
||||
ASSERT_TRUE(!interval_map.insert(Int64Interval(10, 0), "Value"));
|
||||
ASSERT_TRUE(!interval_map.insert(Int64Interval(15, 10), "Value"));
|
||||
ASSERT_TRUE(interval_map.insert(Int64Interval(20, 25), "Value"));
|
||||
|
||||
std::map<Int64Interval, std::string> expected;
|
||||
expected.emplace(Int64Interval{20, 25}, "Value");
|
||||
|
||||
auto actual = intervalMapFindIntervals(interval_map, 20);
|
||||
|
||||
ASSERT_TRUE(actual == expected);
|
||||
ASSERT_TRUE(interval_map.has(20));
|
||||
|
||||
interval_map.build();
|
||||
|
||||
actual = intervalMapFindIntervals(interval_map, 20);
|
||||
|
||||
ASSERT_TRUE(actual == expected);
|
||||
ASSERT_TRUE(interval_map.has(20));
|
||||
}
|
||||
|
@ -537,7 +537,9 @@ void RangeHashedDictionary<dictionary_key_type>::blockToAttributes(const Block &
|
||||
if constexpr (std::is_same_v<KeyType, StringRef>)
|
||||
key = copyStringInArena(string_arena, key);
|
||||
|
||||
setAttributeValue(attribute, key, RangeInterval{lower_bound, upper_bound}, attribute_column[key_index]);
|
||||
if (likely(lower_bound <= upper_bound))
|
||||
setAttributeValue(attribute, key, RangeInterval{lower_bound, upper_bound}, attribute_column[key_index]);
|
||||
|
||||
keys_extractor.rollbackCurrentKey();
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,5 @@
|
||||
Value
|
||||
DefaultValue
|
||||
1
|
||||
0
|
||||
0 15 20 Value
|
@ -0,0 +1,36 @@
|
||||
DROP TABLE IF EXISTS 02179_test_table;
|
||||
CREATE TABLE 02179_test_table
|
||||
(
|
||||
id UInt64,
|
||||
value String,
|
||||
start Int64,
|
||||
end Int64
|
||||
) Engine = TinyLog;
|
||||
|
||||
INSERT INTO 02179_test_table VALUES (0, 'Value', 10, 0);
|
||||
INSERT INTO 02179_test_table VALUES (0, 'Value', 15, 10);
|
||||
INSERT INTO 02179_test_table VALUES (0, 'Value', 15, 20);
|
||||
|
||||
DROP DICTIONARY IF EXISTS 02179_test_dictionary;
|
||||
CREATE DICTIONARY 02179_test_dictionary
|
||||
(
|
||||
id UInt64,
|
||||
value String DEFAULT 'DefaultValue',
|
||||
start Int64,
|
||||
end Int64
|
||||
) PRIMARY KEY id
|
||||
LAYOUT(RANGE_HASHED())
|
||||
SOURCE(CLICKHOUSE(TABLE '02179_test_table'))
|
||||
RANGE(MIN start MAX end)
|
||||
LIFETIME(0);
|
||||
|
||||
SELECT dictGet('02179_test_dictionary', 'value', 0, 15);
|
||||
SELECT dictGet('02179_test_dictionary', 'value', 0, 5);
|
||||
|
||||
SELECT dictHas('02179_test_dictionary', 0, 15);
|
||||
SELECT dictHas('02179_test_dictionary', 0, 5);
|
||||
|
||||
SELECT * FROM 02179_test_dictionary;
|
||||
|
||||
DROP DICTIONARY 02179_test_dictionary;
|
||||
DROP TABLE 02179_test_table;
|
Loading…
Reference in New Issue
Block a user