Merge pull request #33827 from kitaisreal/range-hashed-dictionary-handle-invalid-intervals

RangeHashedDictionary handle invalid intervals
This commit is contained in:
Maksim Kita 2022-01-20 17:16:49 +01:00 committed by GitHub
commit 502c1637d5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 112 additions and 15 deletions

View File

@ -10,6 +10,7 @@ namespace DB
{ {
/** Structure that holds closed interval with left and right. /** Structure that holds closed interval with left and right.
* Interval left must be less than interval right.
* Example: [1, 1] is valid interval, that contain point 1. * Example: [1, 1] is valid interval, that contain point 1.
*/ */
template <typename TIntervalStorageType> template <typename TIntervalStorageType>
@ -70,6 +71,9 @@ struct IntervalTreeVoidValue
* Search for all intervals intersecting point has complexity O(log(n) + k), k is count of intervals that intersect point. * Search for all intervals intersecting point has complexity O(log(n) + k), k is count of intervals that intersect point.
* If we need to only check if there are some interval intersecting point such operation has complexity O(log(n)). * If we need to only check if there are some interval intersecting point such operation has complexity O(log(n)).
* *
* There is invariant that interval left must be less than interval right, otherwise such interval could not contain any point.
* If that invariant is broken, inserting such interval in IntervalTree will return false.
*
* Explanation: * Explanation:
* *
* IntervalTree structure is balanced tree. Each node contains: * IntervalTree structure is balanced tree. Each node contains:
@ -125,44 +129,48 @@ public:
IntervalTree() { nodes.resize(1); } IntervalTree() { nodes.resize(1); }
template <typename TValue = Value, std::enable_if_t<std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true> template <typename TValue = Value, std::enable_if_t<std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true>
void emplace(Interval interval) ALWAYS_INLINE bool emplace(Interval interval)
{ {
assert(!tree_is_built); assert(!tree_is_built);
if (unlikely(interval.left > interval.right))
return false;
sorted_intervals.emplace_back(interval); sorted_intervals.emplace_back(interval);
increaseIntervalsSize(); increaseIntervalsSize();
return true;
} }
template <typename TValue = Value, std::enable_if_t<!std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true, typename... Args> template <typename TValue = Value, std::enable_if_t<!std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true, typename... Args>
void emplace(Interval interval, Args &&... args) ALWAYS_INLINE bool emplace(Interval interval, Args &&... args)
{ {
assert(!tree_is_built); assert(!tree_is_built);
if (unlikely(interval.left > interval.right))
return false;
sorted_intervals.emplace_back( sorted_intervals.emplace_back(
std::piecewise_construct, std::forward_as_tuple(interval), std::forward_as_tuple(std::forward<Args>(args)...)); std::piecewise_construct, std::forward_as_tuple(interval), std::forward_as_tuple(std::forward<Args>(args)...));
increaseIntervalsSize(); increaseIntervalsSize();
return true;
} }
template <typename TValue = Value, std::enable_if_t<std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true> template <typename TValue = Value, std::enable_if_t<std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true>
void insert(Interval interval) bool insert(Interval interval)
{ {
assert(!tree_is_built); return emplace(interval);
sorted_intervals.emplace_back(interval);
increaseIntervalsSize();
} }
template <typename TValue = Value, std::enable_if_t<!std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true> template <typename TValue = Value, std::enable_if_t<!std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true>
void insert(Interval interval, const Value & value) bool insert(Interval interval, const Value & value)
{ {
assert(!tree_is_built); return emplace(interval, value);
sorted_intervals.emplace_back(interval, value);
increaseIntervalsSize();
} }
template <typename TValue = Value, std::enable_if_t<!std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true> template <typename TValue = Value, std::enable_if_t<!std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true>
void insert(Interval interval, Value && value) bool insert(Interval interval, Value && value)
{ {
assert(!tree_is_built); return emplace(interval, std::move(value));
sorted_intervals.emplace_back(interval, std::move(value));
increaseIntervalsSize();
} }
/// Build tree, after that intervals cannot be inserted, and only search or iteration can be performed. /// Build tree, after that intervals cannot be inserted, and only search or iteration can be performed.

View File

@ -309,6 +309,29 @@ TEST(IntervalTree, IntervalSetIterators)
} }
} }
TEST(IntervalTree, IntervalSetInvalidInterval)
{
IntervalSet<Int64Interval> interval_set;
ASSERT_TRUE(!interval_set.insert(Int64Interval(10, 0)));
ASSERT_TRUE(!interval_set.insert(Int64Interval(15, 10)));
ASSERT_TRUE(interval_set.insert(Int64Interval(20, 25)));
std::set<Int64Interval> expected;
expected.insert({20, 25});
auto actual = intervalSetFindIntervals(interval_set, 20);
ASSERT_TRUE(actual == expected);
ASSERT_TRUE(interval_set.has(20));
interval_set.build();
actual = intervalSetFindIntervals(interval_set, 20);
ASSERT_TRUE(actual == expected);
ASSERT_TRUE(interval_set.has(20));
}
TEST(IntervalTree, IntervalMapBasic) TEST(IntervalTree, IntervalMapBasic)
{ {
for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size) for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size)
@ -538,3 +561,26 @@ TEST(IntervalTree, IntervalMapIterators)
} }
} }
} }
TEST(IntervalTree, IntervalMapInvalidInterval)
{
IntervalMap<Int64Interval, std::string> interval_map;
ASSERT_TRUE(!interval_map.insert(Int64Interval(10, 0), "Value"));
ASSERT_TRUE(!interval_map.insert(Int64Interval(15, 10), "Value"));
ASSERT_TRUE(interval_map.insert(Int64Interval(20, 25), "Value"));
std::map<Int64Interval, std::string> expected;
expected.emplace(Int64Interval{20, 25}, "Value");
auto actual = intervalMapFindIntervals(interval_map, 20);
ASSERT_TRUE(actual == expected);
ASSERT_TRUE(interval_map.has(20));
interval_map.build();
actual = intervalMapFindIntervals(interval_map, 20);
ASSERT_TRUE(actual == expected);
ASSERT_TRUE(interval_map.has(20));
}

View File

@ -537,7 +537,9 @@ void RangeHashedDictionary<dictionary_key_type>::blockToAttributes(const Block &
if constexpr (std::is_same_v<KeyType, StringRef>) if constexpr (std::is_same_v<KeyType, StringRef>)
key = copyStringInArena(string_arena, key); key = copyStringInArena(string_arena, key);
setAttributeValue(attribute, key, RangeInterval{lower_bound, upper_bound}, attribute_column[key_index]); if (likely(lower_bound <= upper_bound))
setAttributeValue(attribute, key, RangeInterval{lower_bound, upper_bound}, attribute_column[key_index]);
keys_extractor.rollbackCurrentKey(); keys_extractor.rollbackCurrentKey();
} }

View File

@ -0,0 +1,5 @@
Value
DefaultValue
1
0
0 15 20 Value

View File

@ -0,0 +1,36 @@
DROP TABLE IF EXISTS 02179_test_table;
CREATE TABLE 02179_test_table
(
id UInt64,
value String,
start Int64,
end Int64
) Engine = TinyLog;
INSERT INTO 02179_test_table VALUES (0, 'Value', 10, 0);
INSERT INTO 02179_test_table VALUES (0, 'Value', 15, 10);
INSERT INTO 02179_test_table VALUES (0, 'Value', 15, 20);
DROP DICTIONARY IF EXISTS 02179_test_dictionary;
CREATE DICTIONARY 02179_test_dictionary
(
id UInt64,
value String DEFAULT 'DefaultValue',
start Int64,
end Int64
) PRIMARY KEY id
LAYOUT(RANGE_HASHED())
SOURCE(CLICKHOUSE(TABLE '02179_test_table'))
RANGE(MIN start MAX end)
LIFETIME(0);
SELECT dictGet('02179_test_dictionary', 'value', 0, 15);
SELECT dictGet('02179_test_dictionary', 'value', 0, 5);
SELECT dictHas('02179_test_dictionary', 0, 15);
SELECT dictHas('02179_test_dictionary', 0, 5);
SELECT * FROM 02179_test_dictionary;
DROP DICTIONARY 02179_test_dictionary;
DROP TABLE 02179_test_table;