mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
Merge pull request #33516 from kitaisreal/range-hashed-dictionary-interval-tree
RangeHashedDictionary use interval tree
This commit is contained in:
commit
41a6cd54aa
683
src/Common/IntervalTree.h
Normal file
683
src/Common/IntervalTree.h
Normal file
@ -0,0 +1,683 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/defines.h>
|
||||
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** Structure that holds closed interval with left and right.
|
||||
* Example: [1, 1] is valid interval, that contain point 1.
|
||||
*/
|
||||
template <typename TIntervalStorageType>
|
||||
struct Interval
|
||||
{
|
||||
using IntervalStorageType = TIntervalStorageType;
|
||||
IntervalStorageType left;
|
||||
IntervalStorageType right;
|
||||
|
||||
Interval(IntervalStorageType left_, IntervalStorageType right_) : left(left_), right(right_) { }
|
||||
|
||||
inline bool contains(IntervalStorageType point) const { return left <= point && point <= right; }
|
||||
};
|
||||
|
||||
template <typename IntervalStorageType>
|
||||
bool operator<(const Interval<IntervalStorageType> & lhs, const Interval<IntervalStorageType> & rhs)
|
||||
{
|
||||
return std::tie(lhs.left, lhs.right) < std::tie(rhs.left, rhs.right);
|
||||
}
|
||||
|
||||
template <typename IntervalStorageType>
|
||||
bool operator<=(const Interval<IntervalStorageType> & lhs, const Interval<IntervalStorageType> & rhs)
|
||||
{
|
||||
return std::tie(lhs.left, lhs.right) <= std::tie(rhs.left, rhs.right);
|
||||
}
|
||||
|
||||
template <typename IntervalStorageType>
|
||||
bool operator==(const Interval<IntervalStorageType> & lhs, const Interval<IntervalStorageType> & rhs)
|
||||
{
|
||||
return std::tie(lhs.left, lhs.right) == std::tie(rhs.left, rhs.right);
|
||||
}
|
||||
|
||||
template <typename IntervalStorageType>
|
||||
bool operator!=(const Interval<IntervalStorageType> & lhs, const Interval<IntervalStorageType> & rhs)
|
||||
{
|
||||
return std::tie(lhs.left, lhs.right) != std::tie(rhs.left, rhs.right);
|
||||
}
|
||||
|
||||
template <typename IntervalStorageType>
|
||||
bool operator>(const Interval<IntervalStorageType> & lhs, const Interval<IntervalStorageType> & rhs)
|
||||
{
|
||||
return std::tie(lhs.left, lhs.right) > std::tie(rhs.left, rhs.right);
|
||||
}
|
||||
|
||||
template <typename IntervalStorageType>
|
||||
bool operator>=(const Interval<IntervalStorageType> & lhs, const Interval<IntervalStorageType> & rhs)
|
||||
{
|
||||
return std::tie(lhs.left, lhs.right) >= std::tie(rhs.left, rhs.right);
|
||||
}
|
||||
|
||||
struct IntervalTreeVoidValue
|
||||
{
|
||||
};
|
||||
|
||||
/** Tree structure that allow to efficiently retrieve all intervals that intersect specific point.
|
||||
* https://en.wikipedia.org/wiki/Interval_tree
|
||||
*
|
||||
* Search for all intervals intersecting point has complexity O(log(n) + k), k is count of intervals that intersect point.
|
||||
* If we need to only check if there are some interval intersecting point such operation has complexity O(log(n)).
|
||||
*
|
||||
* Explanation:
|
||||
*
|
||||
* IntervalTree structure is balanced tree. Each node contains:
|
||||
* 1. Point
|
||||
* 2. Intervals sorted by left ascending that intersect that point.
|
||||
* 3. Intervals sorted by right descending that intersect that point.
|
||||
*
|
||||
* Build:
|
||||
*
|
||||
* To keep tree relatively balanced we can use median of all segment points.
|
||||
* On each step build tree node with intervals. For root node input intervals are all intervals.
|
||||
* First split intervals in 4 groups.
|
||||
* 1. Intervals that lie that are less than median point. Interval right is less than median point.
|
||||
* 2. Intervals that lie that are greater than median point. Interval right is less than median point.
|
||||
* 3. Intervals that intersect node sorted by left ascending.
|
||||
* 4. Intervals that intersect node sorted by right descending.
|
||||
*
|
||||
* If intervals in 1 group are not empty. Continue build left child recursively with intervals from 1 group.
|
||||
* If intervals in 2 group are not empty. Continue build right child recursively with intervals from 2 group.
|
||||
*
|
||||
* Search:
|
||||
*
|
||||
* Search for intervals intersecting point is started from root node.
|
||||
* If search point is less than point in node, then we check intervals sorted by left ascending
|
||||
* until left is greater than search point.
|
||||
* If there is left child, continue search recursively in left child.
|
||||
*
|
||||
* If search point is greater than point in node, then we check intervals sorted by right descending
|
||||
* until right is less than search point.
|
||||
* If there is right child, continue search recursively in right child.
|
||||
*
|
||||
* If search point is equal to point in node, then we can emit all intervals that intersect current tree node
|
||||
* and stop searching.
|
||||
*
|
||||
* Additional details:
|
||||
* 1. To improve cache locality tree is stored implicitly in array, after build method is called
|
||||
* other intervals cannot be added to the tree.
|
||||
* 2. Additionally to improve cache locality in tree node we store sorted intervals for all nodes in separate
|
||||
* array. In node we store only start of its sorted intervals, and also size of intersecting intervals.
|
||||
* If we need to retrieve intervals sorted by left ascending they will be stored in indexes
|
||||
* [sorted_intervals_start_index, sorted_intervals_start_index + intersecting_intervals_size).
|
||||
* If we need to retrieve intervals sorted by right descending they will be store in indexes
|
||||
* [sorted_intervals_start_index + intersecting_intervals_size, sorted_intervals_start_index + intersecting_intervals_size * 2).
|
||||
*/
|
||||
template <typename Interval, typename Value>
|
||||
class IntervalTree
|
||||
{
|
||||
public:
|
||||
using IntervalStorageType = typename Interval::IntervalStorageType;
|
||||
|
||||
static constexpr bool is_empty_value = std::is_same_v<Value, IntervalTreeVoidValue>;
|
||||
|
||||
IntervalTree() { nodes.resize(1); }
|
||||
|
||||
template <typename TValue = Value, std::enable_if_t<std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true>
|
||||
void emplace(Interval interval)
|
||||
{
|
||||
assert(!tree_is_built);
|
||||
sorted_intervals.emplace_back(interval);
|
||||
increaseIntervalsSize();
|
||||
}
|
||||
|
||||
template <typename TValue = Value, std::enable_if_t<!std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true, typename... Args>
|
||||
void emplace(Interval interval, Args &&... args)
|
||||
{
|
||||
assert(!tree_is_built);
|
||||
sorted_intervals.emplace_back(
|
||||
std::piecewise_construct, std::forward_as_tuple(interval), std::forward_as_tuple(std::forward<Args>(args)...));
|
||||
increaseIntervalsSize();
|
||||
}
|
||||
|
||||
template <typename TValue = Value, std::enable_if_t<std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true>
|
||||
void insert(Interval interval)
|
||||
{
|
||||
assert(!tree_is_built);
|
||||
sorted_intervals.emplace_back(interval);
|
||||
increaseIntervalsSize();
|
||||
}
|
||||
|
||||
template <typename TValue = Value, std::enable_if_t<!std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true>
|
||||
void insert(Interval interval, const Value & value)
|
||||
{
|
||||
assert(!tree_is_built);
|
||||
sorted_intervals.emplace_back(interval, value);
|
||||
increaseIntervalsSize();
|
||||
}
|
||||
|
||||
template <typename TValue = Value, std::enable_if_t<!std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true>
|
||||
void insert(Interval interval, Value && value)
|
||||
{
|
||||
assert(!tree_is_built);
|
||||
sorted_intervals.emplace_back(interval, std::move(value));
|
||||
increaseIntervalsSize();
|
||||
}
|
||||
|
||||
/// Build tree, after that intervals cannot be inserted, and only search or iteration can be performed.
|
||||
void build()
|
||||
{
|
||||
assert(!tree_is_built);
|
||||
nodes.clear();
|
||||
nodes.reserve(sorted_intervals.size());
|
||||
buildTree();
|
||||
tree_is_built = true;
|
||||
}
|
||||
|
||||
/** Find all intervals intersecting point.
|
||||
*
|
||||
* Callback interface for IntervalSet:
|
||||
*
|
||||
* template <typename IntervalType>
|
||||
* struct IntervalSetCallback
|
||||
* {
|
||||
* bool operator()(const IntervalType & interval)
|
||||
* {
|
||||
* bool should_continue_interval_iteration = false;
|
||||
* return should_continue_interval_iteration;
|
||||
* }
|
||||
* };
|
||||
*
|
||||
* Callback interface for IntervalMap:
|
||||
*
|
||||
* template <typename IntervalType, typename Value>
|
||||
* struct IntervalMapCallback
|
||||
* {
|
||||
* bool operator()(const IntervalType & interval, const Value & value)
|
||||
* {
|
||||
* bool should_continue_interval_iteration = false;
|
||||
* return should_continue_interval_iteration;
|
||||
* }
|
||||
* };
|
||||
*/
|
||||
|
||||
template <typename IntervalCallback>
|
||||
void find(IntervalStorageType point, IntervalCallback && callback) const
|
||||
{
|
||||
if (unlikely(!tree_is_built))
|
||||
{
|
||||
findIntervalsNonConstructedImpl(point, callback);
|
||||
return;
|
||||
}
|
||||
|
||||
findIntervalsImpl(point, callback);
|
||||
}
|
||||
|
||||
/// Check if there is an interval intersecting point
|
||||
bool has(IntervalStorageType point) const
|
||||
{
|
||||
bool has_intervals = false;
|
||||
|
||||
if constexpr (is_empty_value)
|
||||
{
|
||||
find(point, [&](auto &)
|
||||
{
|
||||
has_intervals = true;
|
||||
return false;
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
find(point, [&](auto &, auto &)
|
||||
{
|
||||
has_intervals = true;
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
return has_intervals;
|
||||
}
|
||||
|
||||
class Iterator;
|
||||
using iterator = Iterator;
|
||||
using const_iterator = Iterator;
|
||||
|
||||
iterator begin()
|
||||
{
|
||||
size_t start_index = findFirstIteratorNodeIndex();
|
||||
return Iterator(start_index, 0, this);
|
||||
}
|
||||
|
||||
iterator end()
|
||||
{
|
||||
size_t end_index = findLastIteratorNodeIndex();
|
||||
size_t last_interval_index = 0;
|
||||
|
||||
if (likely(end_index < nodes.size()))
|
||||
last_interval_index = nodes[end_index].sorted_intervals_range_size;
|
||||
|
||||
return Iterator(end_index, last_interval_index, this);
|
||||
}
|
||||
|
||||
const_iterator begin() const
|
||||
{
|
||||
size_t start_index = findFirstIteratorNodeIndex();
|
||||
return Iterator(start_index, 0, this);
|
||||
}
|
||||
|
||||
const_iterator end() const
|
||||
{
|
||||
size_t end_index = findLastIteratorNodeIndex();
|
||||
size_t last_interval_index = 0;
|
||||
|
||||
if (likely(end_index < nodes.size()))
|
||||
last_interval_index = nodes[end_index].sorted_intervals_range_size;
|
||||
|
||||
return Iterator(end_index, last_interval_index, this);
|
||||
}
|
||||
|
||||
const_iterator cbegin() const { return begin(); }
|
||||
|
||||
const_iterator cend() const { return end(); }
|
||||
|
||||
size_t getIntervalsSize() const { return intervals_size; }
|
||||
|
||||
private:
|
||||
struct Node
|
||||
{
|
||||
size_t sorted_intervals_range_start_index;
|
||||
size_t sorted_intervals_range_size;
|
||||
|
||||
IntervalStorageType middle_element;
|
||||
|
||||
inline bool hasValue() const { return sorted_intervals_range_size != 0; }
|
||||
};
|
||||
|
||||
using IntervalWithEmptyValue = Interval;
|
||||
using IntervalWithNonEmptyValue = std::pair<Interval, Value>;
|
||||
|
||||
using IntervalWithValue = std::conditional_t<is_empty_value, IntervalWithEmptyValue, IntervalWithNonEmptyValue>;
|
||||
|
||||
public:
|
||||
class Iterator
|
||||
{
|
||||
public:
|
||||
bool operator==(const Iterator & rhs) const
|
||||
{
|
||||
return node_index == rhs.node_index && current_interval_index == rhs.current_interval_index && tree == rhs.tree;
|
||||
}
|
||||
|
||||
bool operator!=(const Iterator & rhs) const { return !(*this == rhs); }
|
||||
|
||||
const IntervalWithValue & operator*() { return getCurrentValue(); }
|
||||
|
||||
const IntervalWithValue & operator*() const { return getCurrentValue(); }
|
||||
|
||||
const IntervalWithValue * operator->() { return &getCurrentValue(); }
|
||||
|
||||
const IntervalWithValue * operator->() const { return &getCurrentValue(); }
|
||||
|
||||
Iterator & operator++()
|
||||
{
|
||||
iterateToNext();
|
||||
return *this;
|
||||
}
|
||||
|
||||
Iterator operator++(int) // NOLINT
|
||||
{
|
||||
Iterator copy(*this);
|
||||
iterateToNext();
|
||||
return copy;
|
||||
}
|
||||
|
||||
Iterator & operator--()
|
||||
{
|
||||
iterateToPrevious();
|
||||
return *this;
|
||||
}
|
||||
|
||||
Iterator operator--(int) // NOLINT
|
||||
{
|
||||
Iterator copy(*this);
|
||||
iterateToPrevious();
|
||||
return copy;
|
||||
}
|
||||
|
||||
private:
|
||||
friend class IntervalTree;
|
||||
|
||||
Iterator(size_t node_index_, size_t current_interval_index_, const IntervalTree * tree_)
|
||||
: node_index(node_index_), current_interval_index(current_interval_index_), tree(tree_)
|
||||
{
|
||||
}
|
||||
|
||||
size_t node_index;
|
||||
size_t current_interval_index;
|
||||
const IntervalTree * tree;
|
||||
|
||||
void iterateToNext()
|
||||
{
|
||||
size_t nodes_size = tree->nodes.size();
|
||||
auto & current_node = tree->nodes[node_index];
|
||||
|
||||
++current_interval_index;
|
||||
|
||||
if (current_interval_index < current_node.sorted_intervals_range_size)
|
||||
return;
|
||||
|
||||
size_t node_index_copy = node_index + 1;
|
||||
for (; node_index_copy < nodes_size; ++node_index_copy)
|
||||
{
|
||||
auto & node = tree->nodes[node_index_copy];
|
||||
|
||||
if (node.hasValue())
|
||||
{
|
||||
node_index = node_index_copy;
|
||||
current_interval_index = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void iterateToPrevious()
|
||||
{
|
||||
if (current_interval_index > 0)
|
||||
{
|
||||
--current_interval_index;
|
||||
return;
|
||||
}
|
||||
|
||||
while (node_index > 0)
|
||||
{
|
||||
auto & node = tree->nodes[node_index - 1];
|
||||
if (node.hasValue())
|
||||
{
|
||||
current_interval_index = node.sorted_intervals_range_size - 1;
|
||||
break;
|
||||
}
|
||||
|
||||
--node_index;
|
||||
}
|
||||
}
|
||||
|
||||
const IntervalWithValue & getCurrentValue() const
|
||||
{
|
||||
auto & current_node = tree->nodes[node_index];
|
||||
size_t interval_index = current_node.sorted_intervals_range_start_index + current_interval_index;
|
||||
return tree->sorted_intervals[interval_index];
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
void buildTree()
|
||||
{
|
||||
std::vector<IntervalStorageType> temporary_points_storage;
|
||||
temporary_points_storage.reserve(sorted_intervals.size() * 2);
|
||||
|
||||
std::vector<IntervalWithValue> left_intervals;
|
||||
std::vector<IntervalWithValue> right_intervals;
|
||||
std::vector<IntervalWithValue> intervals_sorted_by_left_asc;
|
||||
std::vector<IntervalWithValue> intervals_sorted_by_right_desc;
|
||||
|
||||
struct StackFrame
|
||||
{
|
||||
size_t index;
|
||||
std::vector<IntervalWithValue> intervals;
|
||||
};
|
||||
|
||||
std::vector<StackFrame> stack;
|
||||
stack.emplace_back(StackFrame{0, std::move(sorted_intervals)});
|
||||
sorted_intervals.clear();
|
||||
|
||||
while (!stack.empty())
|
||||
{
|
||||
auto frame = std::move(stack.back());
|
||||
stack.pop_back();
|
||||
|
||||
size_t current_index = frame.index;
|
||||
auto & current_intervals = frame.intervals;
|
||||
|
||||
if (current_intervals.empty())
|
||||
continue;
|
||||
|
||||
if (current_index >= nodes.size())
|
||||
nodes.resize(current_index + 1);
|
||||
|
||||
temporary_points_storage.clear();
|
||||
intervalsToPoints(current_intervals, temporary_points_storage);
|
||||
auto median = pointsMedian(temporary_points_storage);
|
||||
|
||||
left_intervals.clear();
|
||||
right_intervals.clear();
|
||||
intervals_sorted_by_left_asc.clear();
|
||||
intervals_sorted_by_right_desc.clear();
|
||||
|
||||
for (const auto & interval_with_value : current_intervals)
|
||||
{
|
||||
auto & interval = getInterval(interval_with_value);
|
||||
|
||||
if (interval.right < median)
|
||||
{
|
||||
left_intervals.emplace_back(interval_with_value);
|
||||
}
|
||||
else if (interval.left > median)
|
||||
{
|
||||
right_intervals.emplace_back(interval_with_value);
|
||||
}
|
||||
else
|
||||
{
|
||||
intervals_sorted_by_left_asc.emplace_back(interval_with_value);
|
||||
intervals_sorted_by_right_desc.emplace_back(interval_with_value);
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(intervals_sorted_by_left_asc.begin(), intervals_sorted_by_left_asc.end(), [](auto & lhs, auto & rhs)
|
||||
{
|
||||
auto & lhs_interval = getInterval(lhs);
|
||||
auto & rhs_interval = getInterval(rhs);
|
||||
return lhs_interval.left < rhs_interval.left;
|
||||
});
|
||||
|
||||
std::sort(intervals_sorted_by_right_desc.begin(), intervals_sorted_by_right_desc.end(), [](auto & lhs, auto & rhs)
|
||||
{
|
||||
auto & lhs_interval = getInterval(lhs);
|
||||
auto & rhs_interval = getInterval(rhs);
|
||||
return lhs_interval.right > rhs_interval.right;
|
||||
});
|
||||
|
||||
size_t sorted_intervals_range_start_index = sorted_intervals.size();
|
||||
|
||||
for (auto && interval_sorted_by_left_asc : intervals_sorted_by_left_asc)
|
||||
sorted_intervals.emplace_back(std::move(interval_sorted_by_left_asc));
|
||||
|
||||
for (auto && interval_sorted_by_right_desc : intervals_sorted_by_right_desc)
|
||||
sorted_intervals.emplace_back(std::move(interval_sorted_by_right_desc));
|
||||
|
||||
auto & node = nodes[current_index];
|
||||
node.middle_element = median;
|
||||
node.sorted_intervals_range_start_index = sorted_intervals_range_start_index;
|
||||
node.sorted_intervals_range_size = intervals_sorted_by_left_asc.size();
|
||||
|
||||
size_t left_child_index = current_index * 2 + 1;
|
||||
stack.emplace_back(StackFrame{left_child_index, std::move(left_intervals)});
|
||||
|
||||
size_t right_child_index = current_index * 2 + 2;
|
||||
stack.emplace_back(StackFrame{right_child_index, std::move(right_intervals)});
|
||||
}
|
||||
}
|
||||
|
||||
template <typename IntervalCallback>
|
||||
void findIntervalsImpl(IntervalStorageType point, IntervalCallback && callback) const
|
||||
{
|
||||
size_t current_index = 0;
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (current_index >= nodes.size())
|
||||
break;
|
||||
|
||||
auto & node = nodes[current_index];
|
||||
if (!node.hasValue())
|
||||
break;
|
||||
|
||||
auto middle_element = node.middle_element;
|
||||
|
||||
if (point < middle_element)
|
||||
{
|
||||
size_t start = node.sorted_intervals_range_start_index;
|
||||
size_t end = start + node.sorted_intervals_range_size;
|
||||
|
||||
for (; start != end; ++start)
|
||||
{
|
||||
auto & interval_with_value_left_sorted_asc = sorted_intervals[start];
|
||||
auto & interval_left_sorted_asc = getInterval(interval_with_value_left_sorted_asc);
|
||||
if (interval_left_sorted_asc.left > point)
|
||||
break;
|
||||
|
||||
bool should_continue = callCallback(interval_with_value_left_sorted_asc, callback);
|
||||
if (unlikely(!should_continue))
|
||||
return;
|
||||
}
|
||||
|
||||
size_t left_child_index = current_index * 2 + 1;
|
||||
current_index = left_child_index;
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t start = node.sorted_intervals_range_start_index + node.sorted_intervals_range_size;
|
||||
size_t end = start + node.sorted_intervals_range_size;
|
||||
|
||||
for (; start != end; ++start)
|
||||
{
|
||||
auto & interval_with_value_right_sorted_desc = sorted_intervals[start];
|
||||
auto & interval_right_sorted_desc = getInterval(interval_with_value_right_sorted_desc);
|
||||
if (interval_right_sorted_desc.right < point)
|
||||
break;
|
||||
|
||||
bool should_continue = callCallback(interval_with_value_right_sorted_desc, callback);
|
||||
if (unlikely(!should_continue))
|
||||
return;
|
||||
}
|
||||
|
||||
if (likely(point > middle_element))
|
||||
{
|
||||
size_t right_child_index = current_index * 2 + 2;
|
||||
current_index = right_child_index;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// This is case when point == middle_element.
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename IntervalCallback>
|
||||
void findIntervalsNonConstructedImpl(IntervalStorageType point, IntervalCallback && callback) const
|
||||
{
|
||||
for (auto & interval_with_value : sorted_intervals)
|
||||
{
|
||||
auto & interval = getInterval(interval_with_value);
|
||||
|
||||
if (interval.contains(point))
|
||||
callCallback(interval_with_value, callback);
|
||||
}
|
||||
}
|
||||
|
||||
inline size_t findFirstIteratorNodeIndex() const
|
||||
{
|
||||
size_t nodes_size = nodes.size();
|
||||
size_t result_index = 0;
|
||||
|
||||
for (; result_index < nodes_size; ++result_index)
|
||||
{
|
||||
if (nodes[result_index].hasValue())
|
||||
break;
|
||||
}
|
||||
|
||||
if (unlikely(result_index == nodes_size))
|
||||
result_index = 0;
|
||||
|
||||
return result_index;
|
||||
}
|
||||
|
||||
inline size_t findLastIteratorNodeIndex() const
|
||||
{
|
||||
if (unlikely(nodes.empty()))
|
||||
return 0;
|
||||
|
||||
size_t nodes_size = nodes.size();
|
||||
size_t result_index = nodes_size - 1;
|
||||
for (; result_index != 0; --result_index)
|
||||
{
|
||||
if (nodes[result_index].hasValue())
|
||||
break;
|
||||
}
|
||||
|
||||
return result_index;
|
||||
}
|
||||
|
||||
inline void increaseIntervalsSize()
|
||||
{
|
||||
/// Before tree is build we store all intervals size in our first node to allow tree iteration.
|
||||
++intervals_size;
|
||||
nodes[0].sorted_intervals_range_size = intervals_size;
|
||||
}
|
||||
|
||||
std::vector<Node> nodes;
|
||||
std::vector<IntervalWithValue> sorted_intervals;
|
||||
size_t intervals_size = 0;
|
||||
bool tree_is_built = false;
|
||||
|
||||
static inline const Interval & getInterval(const IntervalWithValue & interval_with_value)
|
||||
{
|
||||
if constexpr (is_empty_value)
|
||||
return interval_with_value;
|
||||
else
|
||||
return interval_with_value.first;
|
||||
}
|
||||
|
||||
template <typename IntervalCallback>
|
||||
static inline bool callCallback(const IntervalWithValue & interval, IntervalCallback && callback)
|
||||
{
|
||||
if constexpr (is_empty_value)
|
||||
return callback(interval);
|
||||
else
|
||||
return callback(interval.first, interval.second);
|
||||
}
|
||||
|
||||
static inline void
|
||||
intervalsToPoints(const std::vector<IntervalWithValue> & intervals, std::vector<IntervalStorageType> & temporary_points_storage)
|
||||
{
|
||||
for (const auto & interval_with_value : intervals)
|
||||
{
|
||||
auto & interval = getInterval(interval_with_value);
|
||||
temporary_points_storage.emplace_back(interval.left);
|
||||
temporary_points_storage.emplace_back(interval.right);
|
||||
}
|
||||
}
|
||||
|
||||
static inline IntervalStorageType pointsMedian(std::vector<IntervalStorageType> & points)
|
||||
{
|
||||
size_t size = points.size();
|
||||
size_t middle_element_index = size / 2;
|
||||
|
||||
std::nth_element(points.begin(), points.begin() + middle_element_index, points.end());
|
||||
|
||||
/** We should not get median as average of middle_element_index and middle_element_index - 1
|
||||
* because we want point in node to intersect some interval.
|
||||
* Example: Intervals [1, 1], [3, 3]. If we choose 2 as average point, it does not intersect any interval.
|
||||
*/
|
||||
return points[middle_element_index];
|
||||
}
|
||||
};
|
||||
|
||||
template <typename IntervalType>
|
||||
using IntervalSet = IntervalTree<IntervalType, IntervalTreeVoidValue>;
|
||||
|
||||
template <typename IntervalType, typename Value>
|
||||
using IntervalMap = IntervalTree<IntervalType, Value>;
|
||||
|
||||
}
|
@ -83,3 +83,6 @@ target_link_libraries (executable_udf PRIVATE dbms)
|
||||
|
||||
add_executable(hive_metastore_client hive_metastore_client.cpp)
|
||||
target_link_libraries (hive_metastore_client PUBLIC hivemetastore ${THRIFT_LIBRARY})
|
||||
|
||||
add_executable (interval_tree interval_tree.cpp)
|
||||
target_link_libraries (interval_tree PRIVATE dbms)
|
||||
|
95
src/Common/examples/interval_tree.cpp
Normal file
95
src/Common/examples/interval_tree.cpp
Normal file
@ -0,0 +1,95 @@
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include <Common/randomSeed.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <Common/IntervalTree.h>
|
||||
|
||||
using namespace DB;
|
||||
using Int64Interval = Interval<Int64>;
|
||||
|
||||
int main(int, char **)
|
||||
{
|
||||
{
|
||||
IntervalSet<Int64Interval> tree;
|
||||
|
||||
tree.emplace(Int64Interval(0, 5));
|
||||
tree.emplace(Int64Interval(10, 15));
|
||||
|
||||
tree.build();
|
||||
|
||||
for (const auto & interval : tree)
|
||||
{
|
||||
std::cout << "Interval left " << interval.left << " right " << interval.right << std::endl;
|
||||
}
|
||||
}
|
||||
{
|
||||
IntervalMap<Int64Interval, std::string> tree;
|
||||
|
||||
tree.emplace(Int64Interval(0, 5), "value1");
|
||||
tree.emplace(Int64Interval(10, 15), "value2");
|
||||
|
||||
tree.build();
|
||||
|
||||
for (const auto & [interval, value] : tree)
|
||||
{
|
||||
std::cout << "Interval left " << interval.left << " right " << interval.right;
|
||||
std::cout << " value " << value << std::endl;
|
||||
}
|
||||
}
|
||||
{
|
||||
IntervalSet<Int64Interval> tree;
|
||||
for (size_t i = 0; i < 5; ++i)
|
||||
{
|
||||
tree.emplace(Int64Interval(0, i));
|
||||
}
|
||||
|
||||
tree.build();
|
||||
|
||||
for (const auto & interval : tree)
|
||||
{
|
||||
std::cout << "Interval left " << interval.left << " right " << interval.right << std::endl;
|
||||
}
|
||||
|
||||
for (Int64 i = 0; i < 5; ++i)
|
||||
{
|
||||
tree.find(i, [](auto & interval)
|
||||
{
|
||||
std::cout << "Interval left " << interval.left << " right " << interval.right << std::endl;
|
||||
return true;
|
||||
});
|
||||
}
|
||||
}
|
||||
{
|
||||
IntervalMap<Int64Interval, std::string> tree;
|
||||
for (size_t i = 0; i < 5; ++i)
|
||||
{
|
||||
tree.emplace(Int64Interval(0, i), "Value " + std::to_string(i));
|
||||
}
|
||||
|
||||
tree.build();
|
||||
|
||||
for (const auto & [interval, value] : tree)
|
||||
{
|
||||
std::cout << "Interval left " << interval.left << " right " << interval.right;
|
||||
std::cout << " value " << value << std::endl;
|
||||
}
|
||||
|
||||
for (Int64 i = 0; i < 5; ++i)
|
||||
{
|
||||
tree.find(i, [](auto & interval, auto & value)
|
||||
{
|
||||
std::cout << "Interval left " << interval.left << " right " << interval.right;
|
||||
std::cout << " value " << value << std::endl;
|
||||
|
||||
return true;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
540
src/Common/tests/gtest_interval_tree.cpp
Normal file
540
src/Common/tests/gtest_interval_tree.cpp
Normal file
@ -0,0 +1,540 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <set>
|
||||
#include <map>
|
||||
|
||||
#include <base/types.h>
|
||||
#include <Common/IntervalTree.h>
|
||||
|
||||
|
||||
using namespace DB;
|
||||
using Int64Interval = Interval<Int64>;
|
||||
|
||||
template <typename IntervalType>
|
||||
std::set<IntervalType> intervalSetToSet(const IntervalSet<IntervalType> & interval_set)
|
||||
{
|
||||
std::set<IntervalType> result;
|
||||
|
||||
for (const auto & interval : interval_set)
|
||||
result.insert(interval);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename IntervalType, typename Value>
|
||||
std::map<IntervalType, Value> intervalMapToMap(const IntervalMap<IntervalType, Value> & interval_map)
|
||||
{
|
||||
std::map<IntervalType, Value> result;
|
||||
|
||||
for (const auto & [interval, value] : interval_map)
|
||||
result.emplace(interval, value);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename IntervalType>
|
||||
struct CollectIntervalsSetCallback
|
||||
{
|
||||
explicit CollectIntervalsSetCallback(std::set<IntervalType> & result_intervals_)
|
||||
: result_intervals(result_intervals_)
|
||||
{
|
||||
}
|
||||
|
||||
bool operator()(IntervalType interval)
|
||||
{
|
||||
result_intervals.insert(interval);
|
||||
return true;
|
||||
}
|
||||
|
||||
std::set<IntervalType> & result_intervals;
|
||||
};
|
||||
|
||||
using CollectIntervalsSetInt64Callback = CollectIntervalsSetCallback<Int64Interval>;
|
||||
|
||||
template <typename IntervalType>
|
||||
std::set<IntervalType> intervalSetFindIntervals(const IntervalSet<IntervalType> & interval_set, typename IntervalType::IntervalStorageType point)
|
||||
{
|
||||
std::set<IntervalType> result;
|
||||
CollectIntervalsSetCallback<IntervalType> callback(result);
|
||||
|
||||
interval_set.find(point, callback);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename IntervalType, typename Value>
|
||||
struct CollectIntervalsMapCallback
|
||||
{
|
||||
explicit CollectIntervalsMapCallback(std::map<IntervalType, Value> & result_intervals_)
|
||||
: result_intervals(result_intervals_)
|
||||
{
|
||||
}
|
||||
|
||||
bool operator()(IntervalType interval, const Value & value)
|
||||
{
|
||||
result_intervals.emplace(interval, value);
|
||||
return true;
|
||||
}
|
||||
|
||||
std::map<IntervalType, Value> & result_intervals;
|
||||
};
|
||||
|
||||
|
||||
template <typename IntervalType, typename Value>
|
||||
std::map<IntervalType, Value> intervalMapFindIntervals(const IntervalMap<IntervalType, Value> & interval_set, typename IntervalType::IntervalStorageType point)
|
||||
{
|
||||
std::map<IntervalType, Value> result;
|
||||
CollectIntervalsMapCallback callback(result);
|
||||
|
||||
interval_set.find(point, callback);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
TEST(IntervalTree, IntervalSetBasic)
|
||||
{
|
||||
for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size)
|
||||
{
|
||||
std::set<Int64Interval> expected;
|
||||
IntervalSet<Int64Interval> set;
|
||||
|
||||
for (size_t interval_index = 0; interval_index < intervals_size; ++interval_index)
|
||||
{
|
||||
auto interval = Int64Interval(interval_index * 2, interval_index * 2 + 1);
|
||||
expected.insert(interval);
|
||||
set.insert(interval);
|
||||
}
|
||||
|
||||
ASSERT_TRUE(set.getIntervalsSize() == expected.size());
|
||||
ASSERT_TRUE(set.getIntervalsSize() == intervals_size);
|
||||
ASSERT_TRUE(intervalSetToSet(set) == expected);
|
||||
|
||||
for (const auto & expected_interval : expected)
|
||||
{
|
||||
std::set<Int64Interval> expected_intervals = {{expected_interval}};
|
||||
|
||||
auto actual_intervals = intervalSetFindIntervals(set, expected_interval.left);
|
||||
ASSERT_TRUE(actual_intervals.size() == 1);
|
||||
ASSERT_TRUE(actual_intervals == expected_intervals);
|
||||
|
||||
actual_intervals = intervalSetFindIntervals(set, expected_interval.right);
|
||||
ASSERT_TRUE(actual_intervals.size() == 1);
|
||||
ASSERT_TRUE(actual_intervals == expected_intervals);
|
||||
|
||||
ASSERT_TRUE(set.has(expected_interval.left));
|
||||
ASSERT_TRUE(set.has(expected_interval.right));
|
||||
}
|
||||
|
||||
set.build();
|
||||
|
||||
ASSERT_TRUE(intervalSetToSet(set) == expected);
|
||||
|
||||
for (const auto & expected_interval : expected)
|
||||
{
|
||||
auto actual_interval = intervalSetFindIntervals(set, expected_interval.left);
|
||||
ASSERT_TRUE(actual_interval.size() == 1);
|
||||
ASSERT_TRUE(actual_interval == std::set<Int64Interval>{expected_interval});
|
||||
|
||||
actual_interval = intervalSetFindIntervals(set, expected_interval.right);
|
||||
ASSERT_TRUE(actual_interval.size() == 1);
|
||||
ASSERT_TRUE(actual_interval == std::set<Int64Interval>{expected_interval});
|
||||
|
||||
ASSERT_TRUE(set.has(expected_interval.left));
|
||||
ASSERT_TRUE(set.has(expected_interval.right));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(IntervalTree, IntervalSetPoints)
|
||||
{
|
||||
for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size)
|
||||
{
|
||||
std::set<Int64Interval> expected;
|
||||
IntervalSet<Int64Interval> set;
|
||||
|
||||
for (size_t interval_index = 0; interval_index < intervals_size; ++interval_index)
|
||||
{
|
||||
auto interval = Int64Interval(interval_index, interval_index);
|
||||
expected.insert(interval);
|
||||
set.insert(interval);
|
||||
}
|
||||
|
||||
ASSERT_TRUE(set.getIntervalsSize() == expected.size());
|
||||
ASSERT_TRUE(set.getIntervalsSize() == intervals_size);
|
||||
ASSERT_TRUE(intervalSetToSet(set) == expected);
|
||||
|
||||
for (const auto & expected_interval : expected)
|
||||
{
|
||||
std::set<Int64Interval> expected_intervals = {{expected_interval}};
|
||||
|
||||
auto actual_intervals = intervalSetFindIntervals(set, expected_interval.left);
|
||||
ASSERT_TRUE(actual_intervals.size() == 1);
|
||||
ASSERT_TRUE(actual_intervals == expected_intervals);
|
||||
|
||||
actual_intervals = intervalSetFindIntervals(set, expected_interval.right);
|
||||
ASSERT_TRUE(actual_intervals.size() == 1);
|
||||
ASSERT_TRUE(actual_intervals == expected_intervals);
|
||||
|
||||
ASSERT_TRUE(set.has(expected_interval.left));
|
||||
ASSERT_TRUE(set.has(expected_interval.right));
|
||||
}
|
||||
|
||||
set.build();
|
||||
|
||||
ASSERT_TRUE(intervalSetToSet(set) == expected);
|
||||
|
||||
for (const auto & expected_interval : expected)
|
||||
{
|
||||
auto actual_interval = intervalSetFindIntervals(set, expected_interval.left);
|
||||
ASSERT_TRUE(actual_interval.size() == 1);
|
||||
ASSERT_TRUE(actual_interval == std::set<Int64Interval>{expected_interval});
|
||||
|
||||
actual_interval = intervalSetFindIntervals(set, expected_interval.right);
|
||||
ASSERT_TRUE(actual_interval.size() == 1);
|
||||
ASSERT_TRUE(actual_interval == std::set<Int64Interval>{expected_interval});
|
||||
|
||||
ASSERT_TRUE(set.has(expected_interval.left));
|
||||
ASSERT_TRUE(set.has(expected_interval.right));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(IntervalTree, IntervalSetIntersectingIntervals)
|
||||
{
|
||||
for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size)
|
||||
{
|
||||
std::set<Int64Interval> expected;
|
||||
IntervalSet<Int64Interval> set;
|
||||
|
||||
for (size_t interval_index = 0; interval_index < intervals_size; ++interval_index)
|
||||
{
|
||||
auto interval = Int64Interval(0, interval_index * 2 + 1);
|
||||
expected.insert(interval);
|
||||
set.insert(interval);
|
||||
}
|
||||
|
||||
ASSERT_TRUE(set.getIntervalsSize() == expected.size());
|
||||
ASSERT_TRUE(set.getIntervalsSize() == intervals_size);
|
||||
ASSERT_TRUE(intervalSetToSet(set) == expected);
|
||||
|
||||
for (const auto & expected_interval : expected)
|
||||
{
|
||||
auto actual_intervals = intervalSetFindIntervals(set, expected_interval.left);
|
||||
ASSERT_TRUE(actual_intervals.size() == expected.size());
|
||||
ASSERT_TRUE(actual_intervals == expected);
|
||||
|
||||
ASSERT_TRUE(set.has(expected_interval.left));
|
||||
ASSERT_TRUE(set.has(expected_interval.right));
|
||||
}
|
||||
|
||||
set.build();
|
||||
|
||||
ASSERT_TRUE(intervalSetToSet(set) == expected);
|
||||
|
||||
for (const auto & expected_interval : expected)
|
||||
{
|
||||
auto actual_intervals = intervalSetFindIntervals(set, expected_interval.left);
|
||||
ASSERT_TRUE(actual_intervals.size() == expected.size());
|
||||
ASSERT_TRUE(actual_intervals == expected);
|
||||
|
||||
ASSERT_TRUE(set.has(expected_interval.left));
|
||||
ASSERT_TRUE(set.has(expected_interval.right));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(IntervalTree, IntervalSetIterators)
|
||||
{
|
||||
{
|
||||
IntervalSet<Int64Interval> set;
|
||||
ASSERT_TRUE(set.begin() == set.end());
|
||||
ASSERT_TRUE(set.cbegin() == set.cend());
|
||||
set.build();
|
||||
ASSERT_TRUE(set.begin() == set.end());
|
||||
ASSERT_TRUE(set.cbegin() == set.cend());
|
||||
}
|
||||
{
|
||||
IntervalSet<Int64Interval> set;
|
||||
set.emplace(Int64Interval(0, 5));
|
||||
ASSERT_TRUE(set.begin() != set.end());
|
||||
ASSERT_TRUE((*set.begin()).left == 0);
|
||||
ASSERT_TRUE((*set.begin()).right == 5);
|
||||
ASSERT_TRUE(set.begin()->left == 0);
|
||||
ASSERT_TRUE(set.begin()->right == 5);
|
||||
auto begin = set.begin();
|
||||
++begin;
|
||||
ASSERT_TRUE(begin == set.end());
|
||||
|
||||
begin = set.begin();
|
||||
begin++;
|
||||
ASSERT_TRUE(begin == set.end());
|
||||
|
||||
auto end = set.end();
|
||||
--end;
|
||||
ASSERT_TRUE(set.begin() == end);
|
||||
|
||||
end = set.end();
|
||||
end--;
|
||||
ASSERT_TRUE(set.begin() == end);
|
||||
}
|
||||
{
|
||||
for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size)
|
||||
{
|
||||
std::set<Int64Interval> expected;
|
||||
IntervalSet<Int64Interval> set;
|
||||
|
||||
for (size_t interval_index = 0; interval_index < intervals_size; ++interval_index)
|
||||
{
|
||||
auto interval = Int64Interval(interval_index * 2, interval_index * 2 + 1);
|
||||
set.insert(interval);
|
||||
expected.insert(interval);
|
||||
}
|
||||
|
||||
auto end = set.end();
|
||||
auto begin = set.begin();
|
||||
|
||||
std::set<Int64Interval> actual;
|
||||
|
||||
while (end != begin)
|
||||
{
|
||||
--end;
|
||||
actual.insert(*end);
|
||||
}
|
||||
|
||||
if (end != begin)
|
||||
actual.insert(*end);
|
||||
|
||||
ASSERT_TRUE(actual == expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(IntervalTree, IntervalMapBasic)
|
||||
{
|
||||
for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size)
|
||||
{
|
||||
std::map<Int64Interval, std::string> expected;
|
||||
IntervalMap<Int64Interval, std::string> map;
|
||||
|
||||
for (size_t interval_index = 0; interval_index < intervals_size; ++interval_index)
|
||||
{
|
||||
auto interval = Int64Interval(interval_index * 2, interval_index * 2 + 1);
|
||||
auto value = std::to_string(interval.left);
|
||||
expected.emplace(interval, value);
|
||||
map.emplace(interval, value);
|
||||
}
|
||||
|
||||
ASSERT_TRUE(map.getIntervalsSize() == expected.size());
|
||||
ASSERT_TRUE(map.getIntervalsSize() == intervals_size);
|
||||
ASSERT_TRUE(intervalMapToMap(map) == expected);
|
||||
|
||||
for (const auto & [expected_interval, value] : expected)
|
||||
{
|
||||
std::map<Int64Interval, std::string> expected_intervals = {{expected_interval, std::to_string(expected_interval.left)}};
|
||||
|
||||
auto actual_intervals = intervalMapFindIntervals(map, expected_interval.left);
|
||||
ASSERT_TRUE(actual_intervals.size() == 1);
|
||||
ASSERT_TRUE(actual_intervals == expected_intervals);
|
||||
|
||||
actual_intervals = intervalMapFindIntervals(map, expected_interval.right);
|
||||
ASSERT_TRUE(actual_intervals.size() == 1);
|
||||
ASSERT_TRUE(actual_intervals == expected_intervals);
|
||||
|
||||
ASSERT_TRUE(map.has(expected_interval.left));
|
||||
ASSERT_TRUE(map.has(expected_interval.right));
|
||||
}
|
||||
|
||||
map.build();
|
||||
|
||||
ASSERT_TRUE(intervalMapToMap(map) == expected);
|
||||
|
||||
for (const auto & [expected_interval, value] : expected)
|
||||
{
|
||||
std::map<Int64Interval, std::string> expected_intervals = {{expected_interval, std::to_string(expected_interval.left)}};
|
||||
|
||||
auto actual_intervals = intervalMapFindIntervals(map, expected_interval.left);
|
||||
ASSERT_TRUE(actual_intervals.size() == 1);
|
||||
ASSERT_TRUE(actual_intervals == expected_intervals);
|
||||
|
||||
actual_intervals = intervalMapFindIntervals(map, expected_interval.right);
|
||||
ASSERT_TRUE(actual_intervals.size() == 1);
|
||||
ASSERT_TRUE(actual_intervals == expected_intervals);
|
||||
|
||||
ASSERT_TRUE(map.has(expected_interval.left));
|
||||
ASSERT_TRUE(map.has(expected_interval.right));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(IntervalTree, IntervalMapPoints)
|
||||
{
|
||||
for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size)
|
||||
{
|
||||
std::map<Int64Interval, std::string> expected;
|
||||
IntervalMap<Int64Interval, std::string> map;
|
||||
|
||||
for (size_t interval_index = 0; interval_index < intervals_size; ++interval_index)
|
||||
{
|
||||
auto interval = Int64Interval(interval_index, interval_index);
|
||||
auto value = std::to_string(interval.left);
|
||||
expected.emplace(interval, value);
|
||||
map.emplace(interval, value);
|
||||
}
|
||||
|
||||
ASSERT_TRUE(map.getIntervalsSize() == expected.size());
|
||||
ASSERT_TRUE(map.getIntervalsSize() == intervals_size);
|
||||
ASSERT_TRUE(intervalMapToMap(map) == expected);
|
||||
|
||||
for (const auto & [expected_interval, value] : expected)
|
||||
{
|
||||
std::map<Int64Interval, std::string> expected_intervals = {{expected_interval, std::to_string(expected_interval.left)}};
|
||||
|
||||
auto actual_intervals = intervalMapFindIntervals(map, expected_interval.left);
|
||||
ASSERT_TRUE(actual_intervals.size() == 1);
|
||||
ASSERT_TRUE(actual_intervals == expected_intervals);
|
||||
|
||||
actual_intervals = intervalMapFindIntervals(map, expected_interval.right);
|
||||
ASSERT_TRUE(actual_intervals.size() == 1);
|
||||
ASSERT_TRUE(actual_intervals == expected_intervals);
|
||||
|
||||
ASSERT_TRUE(map.has(expected_interval.left));
|
||||
ASSERT_TRUE(map.has(expected_interval.right));
|
||||
}
|
||||
|
||||
map.build();
|
||||
|
||||
ASSERT_TRUE(intervalMapToMap(map) == expected);
|
||||
|
||||
for (const auto & [expected_interval, value] : expected)
|
||||
{
|
||||
std::map<Int64Interval, std::string> expected_intervals = {{expected_interval, std::to_string(expected_interval.left)}};
|
||||
|
||||
auto actual_intervals = intervalMapFindIntervals(map, expected_interval.left);
|
||||
ASSERT_TRUE(actual_intervals.size() == 1);
|
||||
ASSERT_TRUE(actual_intervals == expected_intervals);
|
||||
|
||||
actual_intervals = intervalMapFindIntervals(map, expected_interval.right);
|
||||
ASSERT_TRUE(actual_intervals.size() == 1);
|
||||
ASSERT_TRUE(actual_intervals == expected_intervals);
|
||||
|
||||
ASSERT_TRUE(map.has(expected_interval.left));
|
||||
ASSERT_TRUE(map.has(expected_interval.right));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(IntervalTree, IntervalMapIntersectingIntervals)
|
||||
{
|
||||
for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size)
|
||||
{
|
||||
std::map<Int64Interval, std::string> expected;
|
||||
IntervalMap<Int64Interval, std::string> map;
|
||||
|
||||
for (size_t interval_index = 0; interval_index < intervals_size; ++interval_index)
|
||||
{
|
||||
auto interval = Int64Interval(0, interval_index * 2 + 1);
|
||||
auto value = std::to_string(interval.left);
|
||||
expected.emplace(interval, value);
|
||||
map.emplace(interval, value);
|
||||
}
|
||||
|
||||
ASSERT_TRUE(map.getIntervalsSize() == expected.size());
|
||||
ASSERT_TRUE(map.getIntervalsSize() == intervals_size);
|
||||
ASSERT_TRUE(intervalMapToMap(map) == expected);
|
||||
|
||||
for (const auto & [expected_interval, value] : expected)
|
||||
{
|
||||
auto actual_intervals = intervalMapFindIntervals(map, expected_interval.left);
|
||||
|
||||
ASSERT_TRUE(actual_intervals.size() == expected.size());
|
||||
ASSERT_TRUE(actual_intervals == expected);
|
||||
|
||||
ASSERT_TRUE(map.has(expected_interval.left));
|
||||
ASSERT_TRUE(map.has(expected_interval.right));
|
||||
}
|
||||
|
||||
map.build();
|
||||
|
||||
ASSERT_TRUE(intervalMapToMap(map) == expected);
|
||||
|
||||
for (const auto & [expected_interval, value] : expected)
|
||||
{
|
||||
auto actual_intervals = intervalMapFindIntervals(map, expected_interval.left);
|
||||
|
||||
ASSERT_TRUE(actual_intervals.size() == expected.size());
|
||||
ASSERT_TRUE(actual_intervals == expected);
|
||||
|
||||
ASSERT_TRUE(map.has(expected_interval.left));
|
||||
ASSERT_TRUE(map.has(expected_interval.right));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(IntervalTree, IntervalMapIterators)
|
||||
{
|
||||
{
|
||||
IntervalMap<Int64Interval, std::string> map;
|
||||
ASSERT_TRUE(map.begin() == map.end());
|
||||
ASSERT_TRUE(map.cbegin() == map.cend());
|
||||
map.build();
|
||||
ASSERT_TRUE(map.begin() == map.end());
|
||||
ASSERT_TRUE(map.cbegin() == map.cend());
|
||||
}
|
||||
{
|
||||
IntervalMap<Int64Interval, std::string> map;
|
||||
map.emplace(Int64Interval(0, 5), "value");
|
||||
ASSERT_TRUE(map.begin() != map.end());
|
||||
ASSERT_TRUE((*map.begin()).first.left == 0);
|
||||
ASSERT_TRUE((*map.begin()).first.right == 5);
|
||||
ASSERT_TRUE((*map.begin()).second == "value");
|
||||
ASSERT_TRUE(map.begin()->first.left == 0);
|
||||
ASSERT_TRUE(map.begin()->first.right == 5);
|
||||
ASSERT_TRUE(map.begin()->second == "value");
|
||||
auto begin = map.begin();
|
||||
++begin;
|
||||
ASSERT_TRUE(begin == map.end());
|
||||
|
||||
begin = map.begin();
|
||||
begin++;
|
||||
ASSERT_TRUE(begin == map.end());
|
||||
|
||||
auto end = map.end();
|
||||
--end;
|
||||
ASSERT_TRUE(map.begin() == end);
|
||||
|
||||
end = map.end();
|
||||
end--;
|
||||
ASSERT_TRUE(map.begin() == end);
|
||||
}
|
||||
{
|
||||
for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size)
|
||||
{
|
||||
std::map<Int64Interval, std::string> expected;
|
||||
IntervalMap<Int64Interval, std::string> map;
|
||||
|
||||
for (size_t interval_index = 0; interval_index < intervals_size; ++interval_index)
|
||||
{
|
||||
auto interval = Int64Interval(interval_index * 2, interval_index * 2 + 1);
|
||||
auto value = std::to_string(interval.left);
|
||||
map.emplace(interval, value);
|
||||
expected.emplace(interval, value);
|
||||
}
|
||||
|
||||
auto end = map.end();
|
||||
auto begin = map.begin();
|
||||
|
||||
std::map<Int64Interval, std::string> actual;
|
||||
|
||||
while (end != begin)
|
||||
{
|
||||
--end;
|
||||
actual.insert(*end);
|
||||
}
|
||||
|
||||
if (end != begin)
|
||||
actual.insert(*end);
|
||||
|
||||
ASSERT_TRUE(actual == expected);
|
||||
}
|
||||
}
|
||||
}
|
@ -22,6 +22,11 @@ using RangeStorageType = DB::RangeStorageType;
|
||||
const RangeStorageType RANGE_MIN_NULL_VALUE = std::numeric_limits<RangeStorageType>::max();
|
||||
const RangeStorageType RANGE_MAX_NULL_VALUE = std::numeric_limits<RangeStorageType>::lowest();
|
||||
|
||||
bool isCorrectDate(const RangeStorageType & date)
|
||||
{
|
||||
return 0 < date && date <= DATE_LUT_MAX_DAY_NUM;
|
||||
}
|
||||
|
||||
// Handle both kinds of null values: explicit nulls of NullableColumn and 'implicit' nulls of Date type.
|
||||
RangeStorageType getColumnIntValueOrDefault(const DB::IColumn & column, size_t index, bool isDate, const RangeStorageType & default_value)
|
||||
{
|
||||
@ -29,7 +34,7 @@ RangeStorageType getColumnIntValueOrDefault(const DB::IColumn & column, size_t i
|
||||
return default_value;
|
||||
|
||||
const RangeStorageType result = static_cast<RangeStorageType>(column.getInt(index));
|
||||
if (isDate && !DB::Range::isCorrectDate(result))
|
||||
if (isDate && !isCorrectDate(result))
|
||||
return default_value;
|
||||
|
||||
return result;
|
||||
@ -57,20 +62,6 @@ namespace ErrorCodes
|
||||
extern const int UNSUPPORTED_METHOD;
|
||||
}
|
||||
|
||||
bool Range::isCorrectDate(const RangeStorageType & date)
|
||||
{
|
||||
return 0 < date && date <= DATE_LUT_MAX_DAY_NUM;
|
||||
}
|
||||
|
||||
bool Range::contains(const RangeStorageType & value) const
|
||||
{
|
||||
return left <= value && value <= right;
|
||||
}
|
||||
|
||||
static bool operator<(const Range & left, const Range & right)
|
||||
{
|
||||
return std::tie(left.left, left.right) < std::tie(right.left, right.right);
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
RangeHashedDictionary<dictionary_key_type>::RangeHashedDictionary(
|
||||
@ -260,16 +251,8 @@ ColumnUInt8::Ptr RangeHashedDictionary<dictionary_key_type>::hasKeys(const Colum
|
||||
if (it)
|
||||
{
|
||||
const auto date = dates[key_index];
|
||||
const auto & ranges_and_values = it->getMapped();
|
||||
const auto val_it = std::find_if(
|
||||
std::begin(ranges_and_values),
|
||||
std::end(ranges_and_values),
|
||||
[date](const Value<ValueType> & v)
|
||||
{
|
||||
return v.range.contains(date);
|
||||
});
|
||||
|
||||
out[key_index] = val_it != std::end(ranges_and_values);
|
||||
const auto & interval_tree = it->getMapped();
|
||||
out[key_index] = interval_tree.has(date);
|
||||
keys_found += out[key_index];
|
||||
}
|
||||
else
|
||||
@ -324,6 +307,8 @@ void RangeHashedDictionary<dictionary_key_type>::loadData()
|
||||
updateData();
|
||||
}
|
||||
|
||||
buildAttributeIntervalTrees();
|
||||
|
||||
if (require_nonempty && 0 == element_count)
|
||||
throw Exception(ErrorCodes::DICTIONARY_IS_EMPTY,
|
||||
"{}: dictionary source is empty and 'require_nonempty' property is set.");
|
||||
@ -407,30 +392,40 @@ void RangeHashedDictionary<dictionary_key_type>::getItemsImpl(
|
||||
if (it)
|
||||
{
|
||||
const auto date = dates[key_index];
|
||||
const auto & ranges_and_values = it->getMapped();
|
||||
const auto val_it = std::find_if(
|
||||
std::begin(ranges_and_values),
|
||||
std::end(ranges_and_values),
|
||||
[date](const Value<AttributeType> & v)
|
||||
{
|
||||
return v.range.contains(date);
|
||||
});
|
||||
const auto & interval_tree = it->getMapped();
|
||||
|
||||
if (val_it != std::end(ranges_and_values))
|
||||
std::optional<AttributeType> min_value;
|
||||
std::optional<RangeInterval> min_range;
|
||||
bool has_interval = false;
|
||||
|
||||
interval_tree.find(date, [&](auto & interval, auto & value)
|
||||
{
|
||||
has_interval = true;
|
||||
|
||||
if (min_range && interval < *min_range)
|
||||
min_range = interval;
|
||||
else
|
||||
min_range = interval;
|
||||
|
||||
min_value = value;
|
||||
|
||||
return true;
|
||||
});
|
||||
|
||||
if (has_interval)
|
||||
{
|
||||
++keys_found;
|
||||
auto & value = val_it->value;
|
||||
|
||||
if constexpr (is_nullable)
|
||||
{
|
||||
if (value.has_value())
|
||||
set_value(key_index, *value, false);
|
||||
if (min_value.has_value())
|
||||
set_value(key_index, *min_value, false);
|
||||
else
|
||||
set_value(key_index, default_value_extractor[key_index], true);
|
||||
}
|
||||
else
|
||||
{
|
||||
set_value(key_index, *value, false);
|
||||
set_value(key_index, *min_value, false);
|
||||
}
|
||||
|
||||
keys_extractor.rollbackCurrentKey();
|
||||
@ -542,7 +537,7 @@ void RangeHashedDictionary<dictionary_key_type>::blockToAttributes(const Block &
|
||||
if constexpr (std::is_same_v<KeyType, StringRef>)
|
||||
key = copyStringInArena(string_arena, key);
|
||||
|
||||
setAttributeValue(attribute, key, Range{lower_bound, upper_bound}, attribute_column[key_index]);
|
||||
setAttributeValue(attribute, key, RangeInterval{lower_bound, upper_bound}, attribute_column[key_index]);
|
||||
keys_extractor.rollbackCurrentKey();
|
||||
}
|
||||
|
||||
@ -550,18 +545,38 @@ void RangeHashedDictionary<dictionary_key_type>::blockToAttributes(const Block &
|
||||
}
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
void RangeHashedDictionary<dictionary_key_type>::buildAttributeIntervalTrees()
|
||||
{
|
||||
for (auto & attribute : attributes)
|
||||
{
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
|
||||
auto & collection = std::get<CollectionType<ValueType>>(attribute.maps);
|
||||
for (auto & [_, ranges] : collection)
|
||||
ranges.build();
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
template <typename T>
|
||||
void RangeHashedDictionary<dictionary_key_type>::setAttributeValueImpl(Attribute & attribute, KeyType key, const Range & range, const Field & value)
|
||||
void RangeHashedDictionary<dictionary_key_type>::setAttributeValueImpl(Attribute & attribute, KeyType key, const RangeInterval & interval, const Field & value)
|
||||
{
|
||||
using ValueType = std::conditional_t<std::is_same_v<T, String>, StringRef, T>;
|
||||
auto & collection = std::get<CollectionType<ValueType>>(attribute.maps);
|
||||
|
||||
Value<ValueType> value_to_insert;
|
||||
std::optional<ValueType> value_to_insert;
|
||||
|
||||
if (attribute.is_nullable && value.isNull())
|
||||
{
|
||||
value_to_insert = { range, {} };
|
||||
value_to_insert = std::nullopt;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -569,11 +584,11 @@ void RangeHashedDictionary<dictionary_key_type>::setAttributeValueImpl(Attribute
|
||||
{
|
||||
const auto & string = value.get<String>();
|
||||
StringRef string_ref = copyStringInArena(string_arena, string);
|
||||
value_to_insert = Value<ValueType>{ range, { string_ref }};
|
||||
value_to_insert = { string_ref };
|
||||
}
|
||||
else
|
||||
{
|
||||
value_to_insert = Value<ValueType>{ range, { value.get<ValueType>() }};
|
||||
value_to_insert = { value.get<ValueType>() };
|
||||
}
|
||||
}
|
||||
|
||||
@ -582,33 +597,25 @@ void RangeHashedDictionary<dictionary_key_type>::setAttributeValueImpl(Attribute
|
||||
if (it)
|
||||
{
|
||||
auto & values = it->getMapped();
|
||||
|
||||
const auto insert_it = std::lower_bound(
|
||||
std::begin(values),
|
||||
std::end(values),
|
||||
range,
|
||||
[](const Value<ValueType> & lhs, const Range & rhs_range)
|
||||
{
|
||||
return lhs.range < rhs_range;
|
||||
});
|
||||
|
||||
values.insert(insert_it, std::move(value_to_insert));
|
||||
values.emplace(interval, std::move(value_to_insert));
|
||||
}
|
||||
else
|
||||
{
|
||||
collection.insert({key, Values<ValueType>{std::move(value_to_insert)}});
|
||||
Values<ValueType> values;
|
||||
values.emplace(interval, value_to_insert);
|
||||
collection.insert({key, std::move(values)});
|
||||
}
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
void RangeHashedDictionary<dictionary_key_type>::setAttributeValue(Attribute & attribute, KeyType key, const Range & range, const Field & value)
|
||||
void RangeHashedDictionary<dictionary_key_type>::setAttributeValue(Attribute & attribute, KeyType key, const RangeInterval & interval, const Field & value)
|
||||
{
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
setAttributeValueImpl<AttributeType>(attribute, key, range, value);
|
||||
setAttributeValueImpl<AttributeType>(attribute, key, interval, value);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
@ -650,14 +657,15 @@ void RangeHashedDictionary<dictionary_key_type>::getKeysAndDates(
|
||||
end_dates.reserve(collection.size());
|
||||
|
||||
const bool is_date = isDate(dict_struct.range_min->type);
|
||||
(void)(is_date);
|
||||
|
||||
for (const auto & key : collection)
|
||||
{
|
||||
for (const auto & value : key.getMapped())
|
||||
for (const auto & [interval, _] : key.getMapped())
|
||||
{
|
||||
keys.push_back(key.getKey());
|
||||
start_dates.push_back(value.range.left);
|
||||
end_dates.push_back(value.range.right);
|
||||
start_dates.push_back(interval.left);
|
||||
end_dates.push_back(interval.right);
|
||||
|
||||
if constexpr (std::numeric_limits<RangeType>::max() > DATE_LUT_MAX_DAY_NUM) /// Avoid warning about tautological comparison in next line.
|
||||
if (is_date && static_cast<UInt64>(end_dates.back()) > DATE_LUT_MAX_DAY_NUM)
|
||||
@ -676,7 +684,7 @@ PaddedPODArray<Int64> RangeHashedDictionary<dictionary_key_type>::makeDateKeys(
|
||||
|
||||
for (size_t i = 0; i < keys.size(); ++i)
|
||||
{
|
||||
if (Range::isCorrectDate(block_start_dates[i]))
|
||||
if (isCorrectDate(block_start_dates[i]))
|
||||
keys[i] = block_start_dates[i]; // NOLINT
|
||||
else
|
||||
keys[i] = block_end_dates[i]; // NOLINT
|
||||
|
@ -8,26 +8,19 @@
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
#include <Common/IntervalTree.h>
|
||||
|
||||
#include <Dictionaries/DictionaryStructure.h>
|
||||
#include <Dictionaries/IDictionary.h>
|
||||
#include <Dictionaries/IDictionarySource.h>
|
||||
#include <Dictionaries/DictionaryHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
using RangeStorageType = Int64;
|
||||
|
||||
struct Range
|
||||
{
|
||||
RangeStorageType left;
|
||||
RangeStorageType right;
|
||||
|
||||
static bool isCorrectDate(const RangeStorageType & date);
|
||||
bool contains(const RangeStorageType & value) const;
|
||||
};
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
class RangeHashedDictionary final : public IDictionary
|
||||
{
|
||||
@ -94,15 +87,11 @@ public:
|
||||
Pipe read(const Names & column_names, size_t max_block_size, size_t num_streams) const override;
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
struct Value final
|
||||
{
|
||||
Range range;
|
||||
std::optional<T> value;
|
||||
};
|
||||
|
||||
using RangeInterval = Interval<RangeStorageType>;
|
||||
|
||||
template <typename T>
|
||||
using Values = std::vector<Value<T>>;
|
||||
using Values = IntervalMap<RangeInterval, std::optional<T>>;
|
||||
|
||||
template <typename Value>
|
||||
using CollectionType = std::conditional_t<
|
||||
@ -160,10 +149,12 @@ private:
|
||||
|
||||
void blockToAttributes(const Block & block);
|
||||
|
||||
template <typename T>
|
||||
void setAttributeValueImpl(Attribute & attribute, KeyType key, const Range & range, const Field & value);
|
||||
void buildAttributeIntervalTrees();
|
||||
|
||||
void setAttributeValue(Attribute & attribute, KeyType key, const Range & range, const Field & value);
|
||||
template <typename T>
|
||||
void setAttributeValueImpl(Attribute & attribute, KeyType key, const RangeInterval & interval, const Field & value);
|
||||
|
||||
void setAttributeValue(Attribute & attribute, KeyType key, const RangeInterval & interval, const Field & value);
|
||||
|
||||
template <typename RangeType>
|
||||
void getKeysAndDates(
|
||||
|
126
tests/performance/range_hashed_dictionary.xml
Normal file
126
tests/performance/range_hashed_dictionary.xml
Normal file
@ -0,0 +1,126 @@
|
||||
<test>
|
||||
<create_query>
|
||||
CREATE TABLE simple_key_range_hashed_dictionary_source_table
|
||||
(
|
||||
id UInt64,
|
||||
value UInt64,
|
||||
start UInt64,
|
||||
end UInt64
|
||||
) ENGINE = Memory;
|
||||
</create_query>
|
||||
|
||||
<create_query>
|
||||
CREATE TABLE complex_key_range_hashed_dictionary_source_table
|
||||
(
|
||||
id UInt64,
|
||||
id_key String,
|
||||
value UInt64,
|
||||
start UInt64,
|
||||
end UInt64
|
||||
) ENGINE = Memory;
|
||||
</create_query>
|
||||
|
||||
<create_query>
|
||||
CREATE DICTIONARY simple_key_range_hashed_dictionary
|
||||
(
|
||||
id UInt64,
|
||||
value UInt64,
|
||||
start UInt64,
|
||||
end UInt64
|
||||
)
|
||||
PRIMARY KEY id
|
||||
SOURCE(CLICKHOUSE(DB 'default' TABLE 'simple_key_range_hashed_dictionary_source_table'))
|
||||
LAYOUT(RANGE_HASHED())
|
||||
RANGE(MIN start MAX end)
|
||||
LIFETIME(MIN 0 MAX 1000);
|
||||
</create_query>
|
||||
|
||||
<create_query>
|
||||
CREATE DICTIONARY complex_key_range_hashed_dictionary
|
||||
(
|
||||
id UInt64,
|
||||
id_key String,
|
||||
value UInt64,
|
||||
start UInt64,
|
||||
end UInt64
|
||||
)
|
||||
PRIMARY KEY id, id_key
|
||||
SOURCE(CLICKHOUSE(DB 'default' TABLE 'complex_key_range_hashed_dictionary_source_table'))
|
||||
LAYOUT(COMPLEX_KEY_RANGE_HASHED())
|
||||
RANGE(MIN start MAX end)
|
||||
LIFETIME(MIN 0 MAX 1000);
|
||||
</create_query>
|
||||
|
||||
<fill_query>
|
||||
INSERT INTO simple_key_range_hashed_dictionary_source_table
|
||||
SELECT key, key, range_start * 2, range_start * 2 + 1 FROM
|
||||
(SELECT number as key FROM numbers(10000)) as keys,
|
||||
(SELECT number as range_start FROM numbers(1000)) as ranges;
|
||||
</fill_query>
|
||||
|
||||
<fill_query>
|
||||
INSERT INTO complex_key_range_hashed_dictionary_source_table
|
||||
SELECT key, toString(key), key, range_start * 2, range_start * 2 + 1 FROM
|
||||
(SELECT number as key FROM numbers(10000)) as keys,
|
||||
(SELECT number as range_start FROM numbers(1000)) as ranges;
|
||||
</fill_query>
|
||||
|
||||
<substitutions>
|
||||
<substitution>
|
||||
<name>elements_count</name>
|
||||
<values>
|
||||
<value>500000</value>
|
||||
<value>750000</value>
|
||||
</values>
|
||||
</substitution>
|
||||
</substitutions>
|
||||
|
||||
<query>
|
||||
WITH rand64() % 5000 as key
|
||||
SELECT dictGet('default.simple_key_range_hashed_dictionary', 'value', toUInt64(key), key)
|
||||
FROM system.numbers
|
||||
LIMIT {elements_count}
|
||||
FORMAT Null;
|
||||
</query>
|
||||
|
||||
<query>
|
||||
WITH rand64() % 5000 as key
|
||||
SELECT dictHas('default.simple_key_range_hashed_dictionary', toUInt64(key), key)
|
||||
FROM system.numbers
|
||||
LIMIT {elements_count}
|
||||
FORMAT Null;
|
||||
</query>
|
||||
|
||||
<query>
|
||||
SELECT * FROM simple_key_range_hashed_dictionary
|
||||
FORMAT Null;
|
||||
</query>
|
||||
|
||||
<query>
|
||||
WITH (rand64() % toUInt64(5000) as key, toString(key) as key_id) as complex_key
|
||||
SELECT dictGet('default.complex_key_range_hashed_dictionary', 'value', complex_key, key)
|
||||
FROM system.numbers
|
||||
LIMIT {elements_count}
|
||||
FORMAT Null;
|
||||
</query>
|
||||
|
||||
<query>
|
||||
WITH (rand64() % toUInt64(5000) as key, toString(key) as key_id) as complex_key
|
||||
SELECT dictHas('default.complex_key_range_hashed_dictionary', complex_key, key)
|
||||
FROM system.numbers
|
||||
LIMIT {elements_count}
|
||||
FORMAT Null;
|
||||
</query>
|
||||
|
||||
<query>
|
||||
SELECT * FROM complex_key_range_hashed_dictionary
|
||||
FORMAT Null;
|
||||
</query>
|
||||
|
||||
<drop_query>DROP TABLE IF EXISTS simple_key_range_hashed_dictionary_source_table;</drop_query>
|
||||
<drop_query>DROP TABLE IF EXISTS complex_key_range_hashed_dictionary_source_table;</drop_query>
|
||||
|
||||
<drop_query>DROP DICTIONARY IF EXISTS simple_key_range_hashed_dictionary;</drop_query>
|
||||
<drop_query>DROP DICTIONARY IF EXISTS complex_key_range_hashed_dictionary;</drop_query>
|
||||
|
||||
</test>
|
@ -45,13 +45,13 @@ SELECT dictHas('database_for_range_dict.range_dictionary', toUInt64(2), toDate('
|
||||
SELECT dictHas('database_for_range_dict.range_dictionary', toUInt64(2), toDate('2019-05-31'));
|
||||
SELECT 'select columns from dictionary';
|
||||
SELECT 'allColumns';
|
||||
SELECT * FROM database_for_range_dict.range_dictionary;
|
||||
SELECT * FROM database_for_range_dict.range_dictionary ORDER BY CountryID, StartDate, EndDate;
|
||||
SELECT 'noColumns';
|
||||
SELECT 1 FROM database_for_range_dict.range_dictionary;
|
||||
SELECT 1 FROM database_for_range_dict.range_dictionary ORDER BY CountryID, StartDate, EndDate;
|
||||
SELECT 'onlySpecificColumns';
|
||||
SELECT CountryID, StartDate, Tax FROM database_for_range_dict.range_dictionary;
|
||||
SELECT CountryID, StartDate, Tax FROM database_for_range_dict.range_dictionary ORDER BY CountryID, StartDate, EndDate;
|
||||
SELECT 'onlySpecificColumn';
|
||||
SELECT Tax FROM database_for_range_dict.range_dictionary;
|
||||
SELECT Tax FROM database_for_range_dict.range_dictionary ORDER BY CountryID, StartDate, EndDate;
|
||||
|
||||
DROP DICTIONARY database_for_range_dict.range_dictionary;
|
||||
DROP TABLE database_for_range_dict.date_table;
|
||||
@ -97,13 +97,13 @@ SELECT dictHas('database_for_range_dict.range_dictionary_nullable', toUInt64(2),
|
||||
SELECT dictHas('database_for_range_dict.range_dictionary_nullable', toUInt64(2), toDate('2019-05-31'));
|
||||
SELECT 'select columns from dictionary';
|
||||
SELECT 'allColumns';
|
||||
SELECT * FROM database_for_range_dict.range_dictionary_nullable;
|
||||
SELECT * FROM database_for_range_dict.range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate;
|
||||
SELECT 'noColumns';
|
||||
SELECT 1 FROM database_for_range_dict.range_dictionary_nullable;
|
||||
SELECT 1 FROM database_for_range_dict.range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate;
|
||||
SELECT 'onlySpecificColumns';
|
||||
SELECT CountryID, StartDate, Tax FROM database_for_range_dict.range_dictionary_nullable;
|
||||
SELECT CountryID, StartDate, Tax FROM database_for_range_dict.range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate;
|
||||
SELECT 'onlySpecificColumn';
|
||||
SELECT Tax FROM database_for_range_dict.range_dictionary_nullable;
|
||||
SELECT Tax FROM database_for_range_dict.range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate;
|
||||
|
||||
DROP DICTIONARY database_for_range_dict.range_dictionary_nullable;
|
||||
DROP TABLE database_for_range_dict.date_table;
|
||||
|
@ -45,13 +45,13 @@ SELECT dictHas('range_dictionary', (toUInt64(2), '2'), toDate('2019-05-29'));
|
||||
SELECT dictHas('range_dictionary', (toUInt64(2), '2'), toDate('2019-05-31'));
|
||||
SELECT 'select columns from dictionary';
|
||||
SELECT 'allColumns';
|
||||
SELECT * FROM range_dictionary;
|
||||
SELECT * FROM range_dictionary ORDER BY CountryID, StartDate, EndDate;
|
||||
SELECT 'noColumns';
|
||||
SELECT 1 FROM range_dictionary;
|
||||
SELECT 1 FROM range_dictionary ORDER BY CountryID, StartDate, EndDate;
|
||||
SELECT 'onlySpecificColumns';
|
||||
SELECT CountryID, StartDate, Tax FROM range_dictionary;
|
||||
SELECT CountryID, StartDate, Tax FROM range_dictionary ORDER BY CountryID, StartDate, EndDate;
|
||||
SELECT 'onlySpecificColumn';
|
||||
SELECT Tax FROM range_dictionary;
|
||||
SELECT Tax FROM range_dictionary ORDER BY CountryID, StartDate, EndDate;
|
||||
|
||||
DROP TABLE date_table;
|
||||
DROP DICTIONARY range_dictionary;
|
||||
@ -99,13 +99,13 @@ SELECT dictHas('range_dictionary_nullable', (toUInt64(2), '2'), toDate('2019-05-
|
||||
SELECT dictHas('range_dictionary_nullable', (toUInt64(2), '2'), toDate('2019-05-31'));
|
||||
SELECT 'select columns from dictionary';
|
||||
SELECT 'allColumns';
|
||||
SELECT * FROM range_dictionary_nullable;
|
||||
SELECT * FROM range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate;
|
||||
SELECT 'noColumns';
|
||||
SELECT 1 FROM range_dictionary_nullable;
|
||||
SELECT 1 FROM range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate;
|
||||
SELECT 'onlySpecificColumns';
|
||||
SELECT CountryID, StartDate, Tax FROM range_dictionary_nullable;
|
||||
SELECT CountryID, StartDate, Tax FROM range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate;
|
||||
SELECT 'onlySpecificColumn';
|
||||
SELECT Tax FROM range_dictionary_nullable;
|
||||
SELECT Tax FROM range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate;
|
||||
|
||||
DROP TABLE date_table;
|
||||
DROP DICTIONARY range_dictionary_nullable;
|
||||
|
Loading…
Reference in New Issue
Block a user