mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 07:31:57 +00:00
Merge pull request #33827 from kitaisreal/range-hashed-dictionary-handle-invalid-intervals
RangeHashedDictionary handle invalid intervals
This commit is contained in:
commit
502c1637d5
@ -10,6 +10,7 @@ namespace DB
|
|||||||
{
|
{
|
||||||
|
|
||||||
/** Structure that holds closed interval with left and right.
|
/** Structure that holds closed interval with left and right.
|
||||||
|
* Interval left must be less than interval right.
|
||||||
* Example: [1, 1] is valid interval, that contain point 1.
|
* Example: [1, 1] is valid interval, that contain point 1.
|
||||||
*/
|
*/
|
||||||
template <typename TIntervalStorageType>
|
template <typename TIntervalStorageType>
|
||||||
@ -70,6 +71,9 @@ struct IntervalTreeVoidValue
|
|||||||
* Search for all intervals intersecting point has complexity O(log(n) + k), k is count of intervals that intersect point.
|
* Search for all intervals intersecting point has complexity O(log(n) + k), k is count of intervals that intersect point.
|
||||||
* If we need to only check if there are some interval intersecting point such operation has complexity O(log(n)).
|
* If we need to only check if there are some interval intersecting point such operation has complexity O(log(n)).
|
||||||
*
|
*
|
||||||
|
* There is invariant that interval left must be less than interval right, otherwise such interval could not contain any point.
|
||||||
|
* If that invariant is broken, inserting such interval in IntervalTree will return false.
|
||||||
|
*
|
||||||
* Explanation:
|
* Explanation:
|
||||||
*
|
*
|
||||||
* IntervalTree structure is balanced tree. Each node contains:
|
* IntervalTree structure is balanced tree. Each node contains:
|
||||||
@ -125,44 +129,48 @@ public:
|
|||||||
IntervalTree() { nodes.resize(1); }
|
IntervalTree() { nodes.resize(1); }
|
||||||
|
|
||||||
template <typename TValue = Value, std::enable_if_t<std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true>
|
template <typename TValue = Value, std::enable_if_t<std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true>
|
||||||
void emplace(Interval interval)
|
ALWAYS_INLINE bool emplace(Interval interval)
|
||||||
{
|
{
|
||||||
assert(!tree_is_built);
|
assert(!tree_is_built);
|
||||||
|
if (unlikely(interval.left > interval.right))
|
||||||
|
return false;
|
||||||
|
|
||||||
sorted_intervals.emplace_back(interval);
|
sorted_intervals.emplace_back(interval);
|
||||||
increaseIntervalsSize();
|
increaseIntervalsSize();
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename TValue = Value, std::enable_if_t<!std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true, typename... Args>
|
template <typename TValue = Value, std::enable_if_t<!std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true, typename... Args>
|
||||||
void emplace(Interval interval, Args &&... args)
|
ALWAYS_INLINE bool emplace(Interval interval, Args &&... args)
|
||||||
{
|
{
|
||||||
assert(!tree_is_built);
|
assert(!tree_is_built);
|
||||||
|
if (unlikely(interval.left > interval.right))
|
||||||
|
return false;
|
||||||
|
|
||||||
sorted_intervals.emplace_back(
|
sorted_intervals.emplace_back(
|
||||||
std::piecewise_construct, std::forward_as_tuple(interval), std::forward_as_tuple(std::forward<Args>(args)...));
|
std::piecewise_construct, std::forward_as_tuple(interval), std::forward_as_tuple(std::forward<Args>(args)...));
|
||||||
increaseIntervalsSize();
|
increaseIntervalsSize();
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename TValue = Value, std::enable_if_t<std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true>
|
template <typename TValue = Value, std::enable_if_t<std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true>
|
||||||
void insert(Interval interval)
|
bool insert(Interval interval)
|
||||||
{
|
{
|
||||||
assert(!tree_is_built);
|
return emplace(interval);
|
||||||
sorted_intervals.emplace_back(interval);
|
|
||||||
increaseIntervalsSize();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename TValue = Value, std::enable_if_t<!std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true>
|
template <typename TValue = Value, std::enable_if_t<!std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true>
|
||||||
void insert(Interval interval, const Value & value)
|
bool insert(Interval interval, const Value & value)
|
||||||
{
|
{
|
||||||
assert(!tree_is_built);
|
return emplace(interval, value);
|
||||||
sorted_intervals.emplace_back(interval, value);
|
|
||||||
increaseIntervalsSize();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename TValue = Value, std::enable_if_t<!std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true>
|
template <typename TValue = Value, std::enable_if_t<!std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true>
|
||||||
void insert(Interval interval, Value && value)
|
bool insert(Interval interval, Value && value)
|
||||||
{
|
{
|
||||||
assert(!tree_is_built);
|
return emplace(interval, std::move(value));
|
||||||
sorted_intervals.emplace_back(interval, std::move(value));
|
|
||||||
increaseIntervalsSize();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Build tree, after that intervals cannot be inserted, and only search or iteration can be performed.
|
/// Build tree, after that intervals cannot be inserted, and only search or iteration can be performed.
|
||||||
|
@ -309,6 +309,29 @@ TEST(IntervalTree, IntervalSetIterators)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(IntervalTree, IntervalSetInvalidInterval)
|
||||||
|
{
|
||||||
|
IntervalSet<Int64Interval> interval_set;
|
||||||
|
ASSERT_TRUE(!interval_set.insert(Int64Interval(10, 0)));
|
||||||
|
ASSERT_TRUE(!interval_set.insert(Int64Interval(15, 10)));
|
||||||
|
ASSERT_TRUE(interval_set.insert(Int64Interval(20, 25)));
|
||||||
|
|
||||||
|
std::set<Int64Interval> expected;
|
||||||
|
expected.insert({20, 25});
|
||||||
|
|
||||||
|
auto actual = intervalSetFindIntervals(interval_set, 20);
|
||||||
|
|
||||||
|
ASSERT_TRUE(actual == expected);
|
||||||
|
ASSERT_TRUE(interval_set.has(20));
|
||||||
|
|
||||||
|
interval_set.build();
|
||||||
|
|
||||||
|
actual = intervalSetFindIntervals(interval_set, 20);
|
||||||
|
|
||||||
|
ASSERT_TRUE(actual == expected);
|
||||||
|
ASSERT_TRUE(interval_set.has(20));
|
||||||
|
}
|
||||||
|
|
||||||
TEST(IntervalTree, IntervalMapBasic)
|
TEST(IntervalTree, IntervalMapBasic)
|
||||||
{
|
{
|
||||||
for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size)
|
for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size)
|
||||||
@ -538,3 +561,26 @@ TEST(IntervalTree, IntervalMapIterators)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(IntervalTree, IntervalMapInvalidInterval)
|
||||||
|
{
|
||||||
|
IntervalMap<Int64Interval, std::string> interval_map;
|
||||||
|
ASSERT_TRUE(!interval_map.insert(Int64Interval(10, 0), "Value"));
|
||||||
|
ASSERT_TRUE(!interval_map.insert(Int64Interval(15, 10), "Value"));
|
||||||
|
ASSERT_TRUE(interval_map.insert(Int64Interval(20, 25), "Value"));
|
||||||
|
|
||||||
|
std::map<Int64Interval, std::string> expected;
|
||||||
|
expected.emplace(Int64Interval{20, 25}, "Value");
|
||||||
|
|
||||||
|
auto actual = intervalMapFindIntervals(interval_map, 20);
|
||||||
|
|
||||||
|
ASSERT_TRUE(actual == expected);
|
||||||
|
ASSERT_TRUE(interval_map.has(20));
|
||||||
|
|
||||||
|
interval_map.build();
|
||||||
|
|
||||||
|
actual = intervalMapFindIntervals(interval_map, 20);
|
||||||
|
|
||||||
|
ASSERT_TRUE(actual == expected);
|
||||||
|
ASSERT_TRUE(interval_map.has(20));
|
||||||
|
}
|
||||||
|
@ -537,7 +537,9 @@ void RangeHashedDictionary<dictionary_key_type>::blockToAttributes(const Block &
|
|||||||
if constexpr (std::is_same_v<KeyType, StringRef>)
|
if constexpr (std::is_same_v<KeyType, StringRef>)
|
||||||
key = copyStringInArena(string_arena, key);
|
key = copyStringInArena(string_arena, key);
|
||||||
|
|
||||||
setAttributeValue(attribute, key, RangeInterval{lower_bound, upper_bound}, attribute_column[key_index]);
|
if (likely(lower_bound <= upper_bound))
|
||||||
|
setAttributeValue(attribute, key, RangeInterval{lower_bound, upper_bound}, attribute_column[key_index]);
|
||||||
|
|
||||||
keys_extractor.rollbackCurrentKey();
|
keys_extractor.rollbackCurrentKey();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -0,0 +1,5 @@
|
|||||||
|
Value
|
||||||
|
DefaultValue
|
||||||
|
1
|
||||||
|
0
|
||||||
|
0 15 20 Value
|
@ -0,0 +1,36 @@
|
|||||||
|
DROP TABLE IF EXISTS 02179_test_table;
|
||||||
|
CREATE TABLE 02179_test_table
|
||||||
|
(
|
||||||
|
id UInt64,
|
||||||
|
value String,
|
||||||
|
start Int64,
|
||||||
|
end Int64
|
||||||
|
) Engine = TinyLog;
|
||||||
|
|
||||||
|
INSERT INTO 02179_test_table VALUES (0, 'Value', 10, 0);
|
||||||
|
INSERT INTO 02179_test_table VALUES (0, 'Value', 15, 10);
|
||||||
|
INSERT INTO 02179_test_table VALUES (0, 'Value', 15, 20);
|
||||||
|
|
||||||
|
DROP DICTIONARY IF EXISTS 02179_test_dictionary;
|
||||||
|
CREATE DICTIONARY 02179_test_dictionary
|
||||||
|
(
|
||||||
|
id UInt64,
|
||||||
|
value String DEFAULT 'DefaultValue',
|
||||||
|
start Int64,
|
||||||
|
end Int64
|
||||||
|
) PRIMARY KEY id
|
||||||
|
LAYOUT(RANGE_HASHED())
|
||||||
|
SOURCE(CLICKHOUSE(TABLE '02179_test_table'))
|
||||||
|
RANGE(MIN start MAX end)
|
||||||
|
LIFETIME(0);
|
||||||
|
|
||||||
|
SELECT dictGet('02179_test_dictionary', 'value', 0, 15);
|
||||||
|
SELECT dictGet('02179_test_dictionary', 'value', 0, 5);
|
||||||
|
|
||||||
|
SELECT dictHas('02179_test_dictionary', 0, 15);
|
||||||
|
SELECT dictHas('02179_test_dictionary', 0, 5);
|
||||||
|
|
||||||
|
SELECT * FROM 02179_test_dictionary;
|
||||||
|
|
||||||
|
DROP DICTIONARY 02179_test_dictionary;
|
||||||
|
DROP TABLE 02179_test_table;
|
Loading…
Reference in New Issue
Block a user