Merge pull request #33516 from kitaisreal/range-hashed-dictionary-interval-tree

RangeHashedDictionary use interval tree
This commit is contained in:
Maksim Kita 2022-01-19 16:30:31 +01:00 committed by GitHub
commit 41a6cd54aa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 1544 additions and 98 deletions

683
src/Common/IntervalTree.h Normal file
View File

@ -0,0 +1,683 @@
#pragma once
#include <base/defines.h>
#include <vector>
#include <utility>
namespace DB
{
/** Structure that holds closed interval with left and right.
* Example: [1, 1] is valid interval, that contain point 1.
*/
template <typename TIntervalStorageType>
struct Interval
{
using IntervalStorageType = TIntervalStorageType;
IntervalStorageType left;
IntervalStorageType right;
Interval(IntervalStorageType left_, IntervalStorageType right_) : left(left_), right(right_) { }
inline bool contains(IntervalStorageType point) const { return left <= point && point <= right; }
};
template <typename IntervalStorageType>
bool operator<(const Interval<IntervalStorageType> & lhs, const Interval<IntervalStorageType> & rhs)
{
return std::tie(lhs.left, lhs.right) < std::tie(rhs.left, rhs.right);
}
template <typename IntervalStorageType>
bool operator<=(const Interval<IntervalStorageType> & lhs, const Interval<IntervalStorageType> & rhs)
{
return std::tie(lhs.left, lhs.right) <= std::tie(rhs.left, rhs.right);
}
template <typename IntervalStorageType>
bool operator==(const Interval<IntervalStorageType> & lhs, const Interval<IntervalStorageType> & rhs)
{
return std::tie(lhs.left, lhs.right) == std::tie(rhs.left, rhs.right);
}
template <typename IntervalStorageType>
bool operator!=(const Interval<IntervalStorageType> & lhs, const Interval<IntervalStorageType> & rhs)
{
return std::tie(lhs.left, lhs.right) != std::tie(rhs.left, rhs.right);
}
template <typename IntervalStorageType>
bool operator>(const Interval<IntervalStorageType> & lhs, const Interval<IntervalStorageType> & rhs)
{
return std::tie(lhs.left, lhs.right) > std::tie(rhs.left, rhs.right);
}
template <typename IntervalStorageType>
bool operator>=(const Interval<IntervalStorageType> & lhs, const Interval<IntervalStorageType> & rhs)
{
return std::tie(lhs.left, lhs.right) >= std::tie(rhs.left, rhs.right);
}
struct IntervalTreeVoidValue
{
};
/** Tree structure that allow to efficiently retrieve all intervals that intersect specific point.
* https://en.wikipedia.org/wiki/Interval_tree
*
* Search for all intervals intersecting point has complexity O(log(n) + k), k is count of intervals that intersect point.
* If we need to only check if there are some interval intersecting point such operation has complexity O(log(n)).
*
* Explanation:
*
* IntervalTree structure is balanced tree. Each node contains:
* 1. Point
* 2. Intervals sorted by left ascending that intersect that point.
* 3. Intervals sorted by right descending that intersect that point.
*
* Build:
*
* To keep tree relatively balanced we can use median of all segment points.
* On each step build tree node with intervals. For root node input intervals are all intervals.
* First split intervals in 4 groups.
* 1. Intervals that lie that are less than median point. Interval right is less than median point.
* 2. Intervals that lie that are greater than median point. Interval right is less than median point.
* 3. Intervals that intersect node sorted by left ascending.
* 4. Intervals that intersect node sorted by right descending.
*
* If intervals in 1 group are not empty. Continue build left child recursively with intervals from 1 group.
* If intervals in 2 group are not empty. Continue build right child recursively with intervals from 2 group.
*
* Search:
*
* Search for intervals intersecting point is started from root node.
* If search point is less than point in node, then we check intervals sorted by left ascending
* until left is greater than search point.
* If there is left child, continue search recursively in left child.
*
* If search point is greater than point in node, then we check intervals sorted by right descending
* until right is less than search point.
* If there is right child, continue search recursively in right child.
*
* If search point is equal to point in node, then we can emit all intervals that intersect current tree node
* and stop searching.
*
* Additional details:
* 1. To improve cache locality tree is stored implicitly in array, after build method is called
* other intervals cannot be added to the tree.
* 2. Additionally to improve cache locality in tree node we store sorted intervals for all nodes in separate
* array. In node we store only start of its sorted intervals, and also size of intersecting intervals.
* If we need to retrieve intervals sorted by left ascending they will be stored in indexes
* [sorted_intervals_start_index, sorted_intervals_start_index + intersecting_intervals_size).
* If we need to retrieve intervals sorted by right descending they will be store in indexes
* [sorted_intervals_start_index + intersecting_intervals_size, sorted_intervals_start_index + intersecting_intervals_size * 2).
*/
template <typename Interval, typename Value>
class IntervalTree
{
public:
using IntervalStorageType = typename Interval::IntervalStorageType;
static constexpr bool is_empty_value = std::is_same_v<Value, IntervalTreeVoidValue>;
IntervalTree() { nodes.resize(1); }
template <typename TValue = Value, std::enable_if_t<std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true>
void emplace(Interval interval)
{
assert(!tree_is_built);
sorted_intervals.emplace_back(interval);
increaseIntervalsSize();
}
template <typename TValue = Value, std::enable_if_t<!std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true, typename... Args>
void emplace(Interval interval, Args &&... args)
{
assert(!tree_is_built);
sorted_intervals.emplace_back(
std::piecewise_construct, std::forward_as_tuple(interval), std::forward_as_tuple(std::forward<Args>(args)...));
increaseIntervalsSize();
}
template <typename TValue = Value, std::enable_if_t<std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true>
void insert(Interval interval)
{
assert(!tree_is_built);
sorted_intervals.emplace_back(interval);
increaseIntervalsSize();
}
template <typename TValue = Value, std::enable_if_t<!std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true>
void insert(Interval interval, const Value & value)
{
assert(!tree_is_built);
sorted_intervals.emplace_back(interval, value);
increaseIntervalsSize();
}
template <typename TValue = Value, std::enable_if_t<!std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true>
void insert(Interval interval, Value && value)
{
assert(!tree_is_built);
sorted_intervals.emplace_back(interval, std::move(value));
increaseIntervalsSize();
}
/// Build tree, after that intervals cannot be inserted, and only search or iteration can be performed.
void build()
{
assert(!tree_is_built);
nodes.clear();
nodes.reserve(sorted_intervals.size());
buildTree();
tree_is_built = true;
}
/** Find all intervals intersecting point.
*
* Callback interface for IntervalSet:
*
* template <typename IntervalType>
* struct IntervalSetCallback
* {
* bool operator()(const IntervalType & interval)
* {
* bool should_continue_interval_iteration = false;
* return should_continue_interval_iteration;
* }
* };
*
* Callback interface for IntervalMap:
*
* template <typename IntervalType, typename Value>
* struct IntervalMapCallback
* {
* bool operator()(const IntervalType & interval, const Value & value)
* {
* bool should_continue_interval_iteration = false;
* return should_continue_interval_iteration;
* }
* };
*/
template <typename IntervalCallback>
void find(IntervalStorageType point, IntervalCallback && callback) const
{
if (unlikely(!tree_is_built))
{
findIntervalsNonConstructedImpl(point, callback);
return;
}
findIntervalsImpl(point, callback);
}
/// Check if there is an interval intersecting point
bool has(IntervalStorageType point) const
{
bool has_intervals = false;
if constexpr (is_empty_value)
{
find(point, [&](auto &)
{
has_intervals = true;
return false;
});
}
else
{
find(point, [&](auto &, auto &)
{
has_intervals = true;
return false;
});
}
return has_intervals;
}
class Iterator;
using iterator = Iterator;
using const_iterator = Iterator;
iterator begin()
{
size_t start_index = findFirstIteratorNodeIndex();
return Iterator(start_index, 0, this);
}
iterator end()
{
size_t end_index = findLastIteratorNodeIndex();
size_t last_interval_index = 0;
if (likely(end_index < nodes.size()))
last_interval_index = nodes[end_index].sorted_intervals_range_size;
return Iterator(end_index, last_interval_index, this);
}
const_iterator begin() const
{
size_t start_index = findFirstIteratorNodeIndex();
return Iterator(start_index, 0, this);
}
const_iterator end() const
{
size_t end_index = findLastIteratorNodeIndex();
size_t last_interval_index = 0;
if (likely(end_index < nodes.size()))
last_interval_index = nodes[end_index].sorted_intervals_range_size;
return Iterator(end_index, last_interval_index, this);
}
const_iterator cbegin() const { return begin(); }
const_iterator cend() const { return end(); }
size_t getIntervalsSize() const { return intervals_size; }
private:
struct Node
{
size_t sorted_intervals_range_start_index;
size_t sorted_intervals_range_size;
IntervalStorageType middle_element;
inline bool hasValue() const { return sorted_intervals_range_size != 0; }
};
using IntervalWithEmptyValue = Interval;
using IntervalWithNonEmptyValue = std::pair<Interval, Value>;
using IntervalWithValue = std::conditional_t<is_empty_value, IntervalWithEmptyValue, IntervalWithNonEmptyValue>;
public:
class Iterator
{
public:
bool operator==(const Iterator & rhs) const
{
return node_index == rhs.node_index && current_interval_index == rhs.current_interval_index && tree == rhs.tree;
}
bool operator!=(const Iterator & rhs) const { return !(*this == rhs); }
const IntervalWithValue & operator*() { return getCurrentValue(); }
const IntervalWithValue & operator*() const { return getCurrentValue(); }
const IntervalWithValue * operator->() { return &getCurrentValue(); }
const IntervalWithValue * operator->() const { return &getCurrentValue(); }
Iterator & operator++()
{
iterateToNext();
return *this;
}
Iterator operator++(int) // NOLINT
{
Iterator copy(*this);
iterateToNext();
return copy;
}
Iterator & operator--()
{
iterateToPrevious();
return *this;
}
Iterator operator--(int) // NOLINT
{
Iterator copy(*this);
iterateToPrevious();
return copy;
}
private:
friend class IntervalTree;
Iterator(size_t node_index_, size_t current_interval_index_, const IntervalTree * tree_)
: node_index(node_index_), current_interval_index(current_interval_index_), tree(tree_)
{
}
size_t node_index;
size_t current_interval_index;
const IntervalTree * tree;
void iterateToNext()
{
size_t nodes_size = tree->nodes.size();
auto & current_node = tree->nodes[node_index];
++current_interval_index;
if (current_interval_index < current_node.sorted_intervals_range_size)
return;
size_t node_index_copy = node_index + 1;
for (; node_index_copy < nodes_size; ++node_index_copy)
{
auto & node = tree->nodes[node_index_copy];
if (node.hasValue())
{
node_index = node_index_copy;
current_interval_index = 0;
break;
}
}
}
void iterateToPrevious()
{
if (current_interval_index > 0)
{
--current_interval_index;
return;
}
while (node_index > 0)
{
auto & node = tree->nodes[node_index - 1];
if (node.hasValue())
{
current_interval_index = node.sorted_intervals_range_size - 1;
break;
}
--node_index;
}
}
const IntervalWithValue & getCurrentValue() const
{
auto & current_node = tree->nodes[node_index];
size_t interval_index = current_node.sorted_intervals_range_start_index + current_interval_index;
return tree->sorted_intervals[interval_index];
}
};
private:
void buildTree()
{
std::vector<IntervalStorageType> temporary_points_storage;
temporary_points_storage.reserve(sorted_intervals.size() * 2);
std::vector<IntervalWithValue> left_intervals;
std::vector<IntervalWithValue> right_intervals;
std::vector<IntervalWithValue> intervals_sorted_by_left_asc;
std::vector<IntervalWithValue> intervals_sorted_by_right_desc;
struct StackFrame
{
size_t index;
std::vector<IntervalWithValue> intervals;
};
std::vector<StackFrame> stack;
stack.emplace_back(StackFrame{0, std::move(sorted_intervals)});
sorted_intervals.clear();
while (!stack.empty())
{
auto frame = std::move(stack.back());
stack.pop_back();
size_t current_index = frame.index;
auto & current_intervals = frame.intervals;
if (current_intervals.empty())
continue;
if (current_index >= nodes.size())
nodes.resize(current_index + 1);
temporary_points_storage.clear();
intervalsToPoints(current_intervals, temporary_points_storage);
auto median = pointsMedian(temporary_points_storage);
left_intervals.clear();
right_intervals.clear();
intervals_sorted_by_left_asc.clear();
intervals_sorted_by_right_desc.clear();
for (const auto & interval_with_value : current_intervals)
{
auto & interval = getInterval(interval_with_value);
if (interval.right < median)
{
left_intervals.emplace_back(interval_with_value);
}
else if (interval.left > median)
{
right_intervals.emplace_back(interval_with_value);
}
else
{
intervals_sorted_by_left_asc.emplace_back(interval_with_value);
intervals_sorted_by_right_desc.emplace_back(interval_with_value);
}
}
std::sort(intervals_sorted_by_left_asc.begin(), intervals_sorted_by_left_asc.end(), [](auto & lhs, auto & rhs)
{
auto & lhs_interval = getInterval(lhs);
auto & rhs_interval = getInterval(rhs);
return lhs_interval.left < rhs_interval.left;
});
std::sort(intervals_sorted_by_right_desc.begin(), intervals_sorted_by_right_desc.end(), [](auto & lhs, auto & rhs)
{
auto & lhs_interval = getInterval(lhs);
auto & rhs_interval = getInterval(rhs);
return lhs_interval.right > rhs_interval.right;
});
size_t sorted_intervals_range_start_index = sorted_intervals.size();
for (auto && interval_sorted_by_left_asc : intervals_sorted_by_left_asc)
sorted_intervals.emplace_back(std::move(interval_sorted_by_left_asc));
for (auto && interval_sorted_by_right_desc : intervals_sorted_by_right_desc)
sorted_intervals.emplace_back(std::move(interval_sorted_by_right_desc));
auto & node = nodes[current_index];
node.middle_element = median;
node.sorted_intervals_range_start_index = sorted_intervals_range_start_index;
node.sorted_intervals_range_size = intervals_sorted_by_left_asc.size();
size_t left_child_index = current_index * 2 + 1;
stack.emplace_back(StackFrame{left_child_index, std::move(left_intervals)});
size_t right_child_index = current_index * 2 + 2;
stack.emplace_back(StackFrame{right_child_index, std::move(right_intervals)});
}
}
template <typename IntervalCallback>
void findIntervalsImpl(IntervalStorageType point, IntervalCallback && callback) const
{
size_t current_index = 0;
while (true)
{
if (current_index >= nodes.size())
break;
auto & node = nodes[current_index];
if (!node.hasValue())
break;
auto middle_element = node.middle_element;
if (point < middle_element)
{
size_t start = node.sorted_intervals_range_start_index;
size_t end = start + node.sorted_intervals_range_size;
for (; start != end; ++start)
{
auto & interval_with_value_left_sorted_asc = sorted_intervals[start];
auto & interval_left_sorted_asc = getInterval(interval_with_value_left_sorted_asc);
if (interval_left_sorted_asc.left > point)
break;
bool should_continue = callCallback(interval_with_value_left_sorted_asc, callback);
if (unlikely(!should_continue))
return;
}
size_t left_child_index = current_index * 2 + 1;
current_index = left_child_index;
}
else
{
size_t start = node.sorted_intervals_range_start_index + node.sorted_intervals_range_size;
size_t end = start + node.sorted_intervals_range_size;
for (; start != end; ++start)
{
auto & interval_with_value_right_sorted_desc = sorted_intervals[start];
auto & interval_right_sorted_desc = getInterval(interval_with_value_right_sorted_desc);
if (interval_right_sorted_desc.right < point)
break;
bool should_continue = callCallback(interval_with_value_right_sorted_desc, callback);
if (unlikely(!should_continue))
return;
}
if (likely(point > middle_element))
{
size_t right_child_index = current_index * 2 + 2;
current_index = right_child_index;
}
else
{
/// This is case when point == middle_element.
break;
}
}
}
}
template <typename IntervalCallback>
void findIntervalsNonConstructedImpl(IntervalStorageType point, IntervalCallback && callback) const
{
for (auto & interval_with_value : sorted_intervals)
{
auto & interval = getInterval(interval_with_value);
if (interval.contains(point))
callCallback(interval_with_value, callback);
}
}
inline size_t findFirstIteratorNodeIndex() const
{
size_t nodes_size = nodes.size();
size_t result_index = 0;
for (; result_index < nodes_size; ++result_index)
{
if (nodes[result_index].hasValue())
break;
}
if (unlikely(result_index == nodes_size))
result_index = 0;
return result_index;
}
inline size_t findLastIteratorNodeIndex() const
{
if (unlikely(nodes.empty()))
return 0;
size_t nodes_size = nodes.size();
size_t result_index = nodes_size - 1;
for (; result_index != 0; --result_index)
{
if (nodes[result_index].hasValue())
break;
}
return result_index;
}
inline void increaseIntervalsSize()
{
/// Before tree is build we store all intervals size in our first node to allow tree iteration.
++intervals_size;
nodes[0].sorted_intervals_range_size = intervals_size;
}
std::vector<Node> nodes;
std::vector<IntervalWithValue> sorted_intervals;
size_t intervals_size = 0;
bool tree_is_built = false;
static inline const Interval & getInterval(const IntervalWithValue & interval_with_value)
{
if constexpr (is_empty_value)
return interval_with_value;
else
return interval_with_value.first;
}
template <typename IntervalCallback>
static inline bool callCallback(const IntervalWithValue & interval, IntervalCallback && callback)
{
if constexpr (is_empty_value)
return callback(interval);
else
return callback(interval.first, interval.second);
}
static inline void
intervalsToPoints(const std::vector<IntervalWithValue> & intervals, std::vector<IntervalStorageType> & temporary_points_storage)
{
for (const auto & interval_with_value : intervals)
{
auto & interval = getInterval(interval_with_value);
temporary_points_storage.emplace_back(interval.left);
temporary_points_storage.emplace_back(interval.right);
}
}
static inline IntervalStorageType pointsMedian(std::vector<IntervalStorageType> & points)
{
size_t size = points.size();
size_t middle_element_index = size / 2;
std::nth_element(points.begin(), points.begin() + middle_element_index, points.end());
/** We should not get median as average of middle_element_index and middle_element_index - 1
* because we want point in node to intersect some interval.
* Example: Intervals [1, 1], [3, 3]. If we choose 2 as average point, it does not intersect any interval.
*/
return points[middle_element_index];
}
};
template <typename IntervalType>
using IntervalSet = IntervalTree<IntervalType, IntervalTreeVoidValue>;
template <typename IntervalType, typename Value>
using IntervalMap = IntervalTree<IntervalType, Value>;
}

View File

@ -83,3 +83,6 @@ target_link_libraries (executable_udf PRIVATE dbms)
add_executable(hive_metastore_client hive_metastore_client.cpp)
target_link_libraries (hive_metastore_client PUBLIC hivemetastore ${THRIFT_LIBRARY})
add_executable (interval_tree interval_tree.cpp)
target_link_libraries (interval_tree PRIVATE dbms)

View File

@ -0,0 +1,95 @@
#include <iomanip>
#include <iostream>
#include <map>
#include <string>
#include <set>
#include <vector>
#include <Common/randomSeed.h>
#include <Common/Stopwatch.h>
#include <Common/IntervalTree.h>
using namespace DB;
using Int64Interval = Interval<Int64>;
int main(int, char **)
{
{
IntervalSet<Int64Interval> tree;
tree.emplace(Int64Interval(0, 5));
tree.emplace(Int64Interval(10, 15));
tree.build();
for (const auto & interval : tree)
{
std::cout << "Interval left " << interval.left << " right " << interval.right << std::endl;
}
}
{
IntervalMap<Int64Interval, std::string> tree;
tree.emplace(Int64Interval(0, 5), "value1");
tree.emplace(Int64Interval(10, 15), "value2");
tree.build();
for (const auto & [interval, value] : tree)
{
std::cout << "Interval left " << interval.left << " right " << interval.right;
std::cout << " value " << value << std::endl;
}
}
{
IntervalSet<Int64Interval> tree;
for (size_t i = 0; i < 5; ++i)
{
tree.emplace(Int64Interval(0, i));
}
tree.build();
for (const auto & interval : tree)
{
std::cout << "Interval left " << interval.left << " right " << interval.right << std::endl;
}
for (Int64 i = 0; i < 5; ++i)
{
tree.find(i, [](auto & interval)
{
std::cout << "Interval left " << interval.left << " right " << interval.right << std::endl;
return true;
});
}
}
{
IntervalMap<Int64Interval, std::string> tree;
for (size_t i = 0; i < 5; ++i)
{
tree.emplace(Int64Interval(0, i), "Value " + std::to_string(i));
}
tree.build();
for (const auto & [interval, value] : tree)
{
std::cout << "Interval left " << interval.left << " right " << interval.right;
std::cout << " value " << value << std::endl;
}
for (Int64 i = 0; i < 5; ++i)
{
tree.find(i, [](auto & interval, auto & value)
{
std::cout << "Interval left " << interval.left << " right " << interval.right;
std::cout << " value " << value << std::endl;
return true;
});
}
}
return 0;
}

View File

@ -0,0 +1,540 @@
#include <gtest/gtest.h>
#include <set>
#include <map>
#include <base/types.h>
#include <Common/IntervalTree.h>
using namespace DB;
using Int64Interval = Interval<Int64>;
template <typename IntervalType>
std::set<IntervalType> intervalSetToSet(const IntervalSet<IntervalType> & interval_set)
{
std::set<IntervalType> result;
for (const auto & interval : interval_set)
result.insert(interval);
return result;
}
template <typename IntervalType, typename Value>
std::map<IntervalType, Value> intervalMapToMap(const IntervalMap<IntervalType, Value> & interval_map)
{
std::map<IntervalType, Value> result;
for (const auto & [interval, value] : interval_map)
result.emplace(interval, value);
return result;
}
template <typename IntervalType>
struct CollectIntervalsSetCallback
{
explicit CollectIntervalsSetCallback(std::set<IntervalType> & result_intervals_)
: result_intervals(result_intervals_)
{
}
bool operator()(IntervalType interval)
{
result_intervals.insert(interval);
return true;
}
std::set<IntervalType> & result_intervals;
};
using CollectIntervalsSetInt64Callback = CollectIntervalsSetCallback<Int64Interval>;
template <typename IntervalType>
std::set<IntervalType> intervalSetFindIntervals(const IntervalSet<IntervalType> & interval_set, typename IntervalType::IntervalStorageType point)
{
std::set<IntervalType> result;
CollectIntervalsSetCallback<IntervalType> callback(result);
interval_set.find(point, callback);
return result;
}
template <typename IntervalType, typename Value>
struct CollectIntervalsMapCallback
{
explicit CollectIntervalsMapCallback(std::map<IntervalType, Value> & result_intervals_)
: result_intervals(result_intervals_)
{
}
bool operator()(IntervalType interval, const Value & value)
{
result_intervals.emplace(interval, value);
return true;
}
std::map<IntervalType, Value> & result_intervals;
};
template <typename IntervalType, typename Value>
std::map<IntervalType, Value> intervalMapFindIntervals(const IntervalMap<IntervalType, Value> & interval_set, typename IntervalType::IntervalStorageType point)
{
std::map<IntervalType, Value> result;
CollectIntervalsMapCallback callback(result);
interval_set.find(point, callback);
return result;
}
TEST(IntervalTree, IntervalSetBasic)
{
for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size)
{
std::set<Int64Interval> expected;
IntervalSet<Int64Interval> set;
for (size_t interval_index = 0; interval_index < intervals_size; ++interval_index)
{
auto interval = Int64Interval(interval_index * 2, interval_index * 2 + 1);
expected.insert(interval);
set.insert(interval);
}
ASSERT_TRUE(set.getIntervalsSize() == expected.size());
ASSERT_TRUE(set.getIntervalsSize() == intervals_size);
ASSERT_TRUE(intervalSetToSet(set) == expected);
for (const auto & expected_interval : expected)
{
std::set<Int64Interval> expected_intervals = {{expected_interval}};
auto actual_intervals = intervalSetFindIntervals(set, expected_interval.left);
ASSERT_TRUE(actual_intervals.size() == 1);
ASSERT_TRUE(actual_intervals == expected_intervals);
actual_intervals = intervalSetFindIntervals(set, expected_interval.right);
ASSERT_TRUE(actual_intervals.size() == 1);
ASSERT_TRUE(actual_intervals == expected_intervals);
ASSERT_TRUE(set.has(expected_interval.left));
ASSERT_TRUE(set.has(expected_interval.right));
}
set.build();
ASSERT_TRUE(intervalSetToSet(set) == expected);
for (const auto & expected_interval : expected)
{
auto actual_interval = intervalSetFindIntervals(set, expected_interval.left);
ASSERT_TRUE(actual_interval.size() == 1);
ASSERT_TRUE(actual_interval == std::set<Int64Interval>{expected_interval});
actual_interval = intervalSetFindIntervals(set, expected_interval.right);
ASSERT_TRUE(actual_interval.size() == 1);
ASSERT_TRUE(actual_interval == std::set<Int64Interval>{expected_interval});
ASSERT_TRUE(set.has(expected_interval.left));
ASSERT_TRUE(set.has(expected_interval.right));
}
}
}
TEST(IntervalTree, IntervalSetPoints)
{
for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size)
{
std::set<Int64Interval> expected;
IntervalSet<Int64Interval> set;
for (size_t interval_index = 0; interval_index < intervals_size; ++interval_index)
{
auto interval = Int64Interval(interval_index, interval_index);
expected.insert(interval);
set.insert(interval);
}
ASSERT_TRUE(set.getIntervalsSize() == expected.size());
ASSERT_TRUE(set.getIntervalsSize() == intervals_size);
ASSERT_TRUE(intervalSetToSet(set) == expected);
for (const auto & expected_interval : expected)
{
std::set<Int64Interval> expected_intervals = {{expected_interval}};
auto actual_intervals = intervalSetFindIntervals(set, expected_interval.left);
ASSERT_TRUE(actual_intervals.size() == 1);
ASSERT_TRUE(actual_intervals == expected_intervals);
actual_intervals = intervalSetFindIntervals(set, expected_interval.right);
ASSERT_TRUE(actual_intervals.size() == 1);
ASSERT_TRUE(actual_intervals == expected_intervals);
ASSERT_TRUE(set.has(expected_interval.left));
ASSERT_TRUE(set.has(expected_interval.right));
}
set.build();
ASSERT_TRUE(intervalSetToSet(set) == expected);
for (const auto & expected_interval : expected)
{
auto actual_interval = intervalSetFindIntervals(set, expected_interval.left);
ASSERT_TRUE(actual_interval.size() == 1);
ASSERT_TRUE(actual_interval == std::set<Int64Interval>{expected_interval});
actual_interval = intervalSetFindIntervals(set, expected_interval.right);
ASSERT_TRUE(actual_interval.size() == 1);
ASSERT_TRUE(actual_interval == std::set<Int64Interval>{expected_interval});
ASSERT_TRUE(set.has(expected_interval.left));
ASSERT_TRUE(set.has(expected_interval.right));
}
}
}
TEST(IntervalTree, IntervalSetIntersectingIntervals)
{
for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size)
{
std::set<Int64Interval> expected;
IntervalSet<Int64Interval> set;
for (size_t interval_index = 0; interval_index < intervals_size; ++interval_index)
{
auto interval = Int64Interval(0, interval_index * 2 + 1);
expected.insert(interval);
set.insert(interval);
}
ASSERT_TRUE(set.getIntervalsSize() == expected.size());
ASSERT_TRUE(set.getIntervalsSize() == intervals_size);
ASSERT_TRUE(intervalSetToSet(set) == expected);
for (const auto & expected_interval : expected)
{
auto actual_intervals = intervalSetFindIntervals(set, expected_interval.left);
ASSERT_TRUE(actual_intervals.size() == expected.size());
ASSERT_TRUE(actual_intervals == expected);
ASSERT_TRUE(set.has(expected_interval.left));
ASSERT_TRUE(set.has(expected_interval.right));
}
set.build();
ASSERT_TRUE(intervalSetToSet(set) == expected);
for (const auto & expected_interval : expected)
{
auto actual_intervals = intervalSetFindIntervals(set, expected_interval.left);
ASSERT_TRUE(actual_intervals.size() == expected.size());
ASSERT_TRUE(actual_intervals == expected);
ASSERT_TRUE(set.has(expected_interval.left));
ASSERT_TRUE(set.has(expected_interval.right));
}
}
}
TEST(IntervalTree, IntervalSetIterators)
{
{
IntervalSet<Int64Interval> set;
ASSERT_TRUE(set.begin() == set.end());
ASSERT_TRUE(set.cbegin() == set.cend());
set.build();
ASSERT_TRUE(set.begin() == set.end());
ASSERT_TRUE(set.cbegin() == set.cend());
}
{
IntervalSet<Int64Interval> set;
set.emplace(Int64Interval(0, 5));
ASSERT_TRUE(set.begin() != set.end());
ASSERT_TRUE((*set.begin()).left == 0);
ASSERT_TRUE((*set.begin()).right == 5);
ASSERT_TRUE(set.begin()->left == 0);
ASSERT_TRUE(set.begin()->right == 5);
auto begin = set.begin();
++begin;
ASSERT_TRUE(begin == set.end());
begin = set.begin();
begin++;
ASSERT_TRUE(begin == set.end());
auto end = set.end();
--end;
ASSERT_TRUE(set.begin() == end);
end = set.end();
end--;
ASSERT_TRUE(set.begin() == end);
}
{
for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size)
{
std::set<Int64Interval> expected;
IntervalSet<Int64Interval> set;
for (size_t interval_index = 0; interval_index < intervals_size; ++interval_index)
{
auto interval = Int64Interval(interval_index * 2, interval_index * 2 + 1);
set.insert(interval);
expected.insert(interval);
}
auto end = set.end();
auto begin = set.begin();
std::set<Int64Interval> actual;
while (end != begin)
{
--end;
actual.insert(*end);
}
if (end != begin)
actual.insert(*end);
ASSERT_TRUE(actual == expected);
}
}
}
TEST(IntervalTree, IntervalMapBasic)
{
for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size)
{
std::map<Int64Interval, std::string> expected;
IntervalMap<Int64Interval, std::string> map;
for (size_t interval_index = 0; interval_index < intervals_size; ++interval_index)
{
auto interval = Int64Interval(interval_index * 2, interval_index * 2 + 1);
auto value = std::to_string(interval.left);
expected.emplace(interval, value);
map.emplace(interval, value);
}
ASSERT_TRUE(map.getIntervalsSize() == expected.size());
ASSERT_TRUE(map.getIntervalsSize() == intervals_size);
ASSERT_TRUE(intervalMapToMap(map) == expected);
for (const auto & [expected_interval, value] : expected)
{
std::map<Int64Interval, std::string> expected_intervals = {{expected_interval, std::to_string(expected_interval.left)}};
auto actual_intervals = intervalMapFindIntervals(map, expected_interval.left);
ASSERT_TRUE(actual_intervals.size() == 1);
ASSERT_TRUE(actual_intervals == expected_intervals);
actual_intervals = intervalMapFindIntervals(map, expected_interval.right);
ASSERT_TRUE(actual_intervals.size() == 1);
ASSERT_TRUE(actual_intervals == expected_intervals);
ASSERT_TRUE(map.has(expected_interval.left));
ASSERT_TRUE(map.has(expected_interval.right));
}
map.build();
ASSERT_TRUE(intervalMapToMap(map) == expected);
for (const auto & [expected_interval, value] : expected)
{
std::map<Int64Interval, std::string> expected_intervals = {{expected_interval, std::to_string(expected_interval.left)}};
auto actual_intervals = intervalMapFindIntervals(map, expected_interval.left);
ASSERT_TRUE(actual_intervals.size() == 1);
ASSERT_TRUE(actual_intervals == expected_intervals);
actual_intervals = intervalMapFindIntervals(map, expected_interval.right);
ASSERT_TRUE(actual_intervals.size() == 1);
ASSERT_TRUE(actual_intervals == expected_intervals);
ASSERT_TRUE(map.has(expected_interval.left));
ASSERT_TRUE(map.has(expected_interval.right));
}
}
}
TEST(IntervalTree, IntervalMapPoints)
{
for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size)
{
std::map<Int64Interval, std::string> expected;
IntervalMap<Int64Interval, std::string> map;
for (size_t interval_index = 0; interval_index < intervals_size; ++interval_index)
{
auto interval = Int64Interval(interval_index, interval_index);
auto value = std::to_string(interval.left);
expected.emplace(interval, value);
map.emplace(interval, value);
}
ASSERT_TRUE(map.getIntervalsSize() == expected.size());
ASSERT_TRUE(map.getIntervalsSize() == intervals_size);
ASSERT_TRUE(intervalMapToMap(map) == expected);
for (const auto & [expected_interval, value] : expected)
{
std::map<Int64Interval, std::string> expected_intervals = {{expected_interval, std::to_string(expected_interval.left)}};
auto actual_intervals = intervalMapFindIntervals(map, expected_interval.left);
ASSERT_TRUE(actual_intervals.size() == 1);
ASSERT_TRUE(actual_intervals == expected_intervals);
actual_intervals = intervalMapFindIntervals(map, expected_interval.right);
ASSERT_TRUE(actual_intervals.size() == 1);
ASSERT_TRUE(actual_intervals == expected_intervals);
ASSERT_TRUE(map.has(expected_interval.left));
ASSERT_TRUE(map.has(expected_interval.right));
}
map.build();
ASSERT_TRUE(intervalMapToMap(map) == expected);
for (const auto & [expected_interval, value] : expected)
{
std::map<Int64Interval, std::string> expected_intervals = {{expected_interval, std::to_string(expected_interval.left)}};
auto actual_intervals = intervalMapFindIntervals(map, expected_interval.left);
ASSERT_TRUE(actual_intervals.size() == 1);
ASSERT_TRUE(actual_intervals == expected_intervals);
actual_intervals = intervalMapFindIntervals(map, expected_interval.right);
ASSERT_TRUE(actual_intervals.size() == 1);
ASSERT_TRUE(actual_intervals == expected_intervals);
ASSERT_TRUE(map.has(expected_interval.left));
ASSERT_TRUE(map.has(expected_interval.right));
}
}
}
TEST(IntervalTree, IntervalMapIntersectingIntervals)
{
for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size)
{
std::map<Int64Interval, std::string> expected;
IntervalMap<Int64Interval, std::string> map;
for (size_t interval_index = 0; interval_index < intervals_size; ++interval_index)
{
auto interval = Int64Interval(0, interval_index * 2 + 1);
auto value = std::to_string(interval.left);
expected.emplace(interval, value);
map.emplace(interval, value);
}
ASSERT_TRUE(map.getIntervalsSize() == expected.size());
ASSERT_TRUE(map.getIntervalsSize() == intervals_size);
ASSERT_TRUE(intervalMapToMap(map) == expected);
for (const auto & [expected_interval, value] : expected)
{
auto actual_intervals = intervalMapFindIntervals(map, expected_interval.left);
ASSERT_TRUE(actual_intervals.size() == expected.size());
ASSERT_TRUE(actual_intervals == expected);
ASSERT_TRUE(map.has(expected_interval.left));
ASSERT_TRUE(map.has(expected_interval.right));
}
map.build();
ASSERT_TRUE(intervalMapToMap(map) == expected);
for (const auto & [expected_interval, value] : expected)
{
auto actual_intervals = intervalMapFindIntervals(map, expected_interval.left);
ASSERT_TRUE(actual_intervals.size() == expected.size());
ASSERT_TRUE(actual_intervals == expected);
ASSERT_TRUE(map.has(expected_interval.left));
ASSERT_TRUE(map.has(expected_interval.right));
}
}
}
TEST(IntervalTree, IntervalMapIterators)
{
{
IntervalMap<Int64Interval, std::string> map;
ASSERT_TRUE(map.begin() == map.end());
ASSERT_TRUE(map.cbegin() == map.cend());
map.build();
ASSERT_TRUE(map.begin() == map.end());
ASSERT_TRUE(map.cbegin() == map.cend());
}
{
IntervalMap<Int64Interval, std::string> map;
map.emplace(Int64Interval(0, 5), "value");
ASSERT_TRUE(map.begin() != map.end());
ASSERT_TRUE((*map.begin()).first.left == 0);
ASSERT_TRUE((*map.begin()).first.right == 5);
ASSERT_TRUE((*map.begin()).second == "value");
ASSERT_TRUE(map.begin()->first.left == 0);
ASSERT_TRUE(map.begin()->first.right == 5);
ASSERT_TRUE(map.begin()->second == "value");
auto begin = map.begin();
++begin;
ASSERT_TRUE(begin == map.end());
begin = map.begin();
begin++;
ASSERT_TRUE(begin == map.end());
auto end = map.end();
--end;
ASSERT_TRUE(map.begin() == end);
end = map.end();
end--;
ASSERT_TRUE(map.begin() == end);
}
{
for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size)
{
std::map<Int64Interval, std::string> expected;
IntervalMap<Int64Interval, std::string> map;
for (size_t interval_index = 0; interval_index < intervals_size; ++interval_index)
{
auto interval = Int64Interval(interval_index * 2, interval_index * 2 + 1);
auto value = std::to_string(interval.left);
map.emplace(interval, value);
expected.emplace(interval, value);
}
auto end = map.end();
auto begin = map.begin();
std::map<Int64Interval, std::string> actual;
while (end != begin)
{
--end;
actual.insert(*end);
}
if (end != begin)
actual.insert(*end);
ASSERT_TRUE(actual == expected);
}
}
}

View File

@ -22,6 +22,11 @@ using RangeStorageType = DB::RangeStorageType;
const RangeStorageType RANGE_MIN_NULL_VALUE = std::numeric_limits<RangeStorageType>::max();
const RangeStorageType RANGE_MAX_NULL_VALUE = std::numeric_limits<RangeStorageType>::lowest();
bool isCorrectDate(const RangeStorageType & date)
{
return 0 < date && date <= DATE_LUT_MAX_DAY_NUM;
}
// Handle both kinds of null values: explicit nulls of NullableColumn and 'implicit' nulls of Date type.
RangeStorageType getColumnIntValueOrDefault(const DB::IColumn & column, size_t index, bool isDate, const RangeStorageType & default_value)
{
@ -29,7 +34,7 @@ RangeStorageType getColumnIntValueOrDefault(const DB::IColumn & column, size_t i
return default_value;
const RangeStorageType result = static_cast<RangeStorageType>(column.getInt(index));
if (isDate && !DB::Range::isCorrectDate(result))
if (isDate && !isCorrectDate(result))
return default_value;
return result;
@ -57,20 +62,6 @@ namespace ErrorCodes
extern const int UNSUPPORTED_METHOD;
}
bool Range::isCorrectDate(const RangeStorageType & date)
{
return 0 < date && date <= DATE_LUT_MAX_DAY_NUM;
}
bool Range::contains(const RangeStorageType & value) const
{
return left <= value && value <= right;
}
static bool operator<(const Range & left, const Range & right)
{
return std::tie(left.left, left.right) < std::tie(right.left, right.right);
}
template <DictionaryKeyType dictionary_key_type>
RangeHashedDictionary<dictionary_key_type>::RangeHashedDictionary(
@ -260,16 +251,8 @@ ColumnUInt8::Ptr RangeHashedDictionary<dictionary_key_type>::hasKeys(const Colum
if (it)
{
const auto date = dates[key_index];
const auto & ranges_and_values = it->getMapped();
const auto val_it = std::find_if(
std::begin(ranges_and_values),
std::end(ranges_and_values),
[date](const Value<ValueType> & v)
{
return v.range.contains(date);
});
out[key_index] = val_it != std::end(ranges_and_values);
const auto & interval_tree = it->getMapped();
out[key_index] = interval_tree.has(date);
keys_found += out[key_index];
}
else
@ -324,6 +307,8 @@ void RangeHashedDictionary<dictionary_key_type>::loadData()
updateData();
}
buildAttributeIntervalTrees();
if (require_nonempty && 0 == element_count)
throw Exception(ErrorCodes::DICTIONARY_IS_EMPTY,
"{}: dictionary source is empty and 'require_nonempty' property is set.");
@ -407,30 +392,40 @@ void RangeHashedDictionary<dictionary_key_type>::getItemsImpl(
if (it)
{
const auto date = dates[key_index];
const auto & ranges_and_values = it->getMapped();
const auto val_it = std::find_if(
std::begin(ranges_and_values),
std::end(ranges_and_values),
[date](const Value<AttributeType> & v)
{
return v.range.contains(date);
});
const auto & interval_tree = it->getMapped();
if (val_it != std::end(ranges_and_values))
std::optional<AttributeType> min_value;
std::optional<RangeInterval> min_range;
bool has_interval = false;
interval_tree.find(date, [&](auto & interval, auto & value)
{
has_interval = true;
if (min_range && interval < *min_range)
min_range = interval;
else
min_range = interval;
min_value = value;
return true;
});
if (has_interval)
{
++keys_found;
auto & value = val_it->value;
if constexpr (is_nullable)
{
if (value.has_value())
set_value(key_index, *value, false);
if (min_value.has_value())
set_value(key_index, *min_value, false);
else
set_value(key_index, default_value_extractor[key_index], true);
}
else
{
set_value(key_index, *value, false);
set_value(key_index, *min_value, false);
}
keys_extractor.rollbackCurrentKey();
@ -542,7 +537,7 @@ void RangeHashedDictionary<dictionary_key_type>::blockToAttributes(const Block &
if constexpr (std::is_same_v<KeyType, StringRef>)
key = copyStringInArena(string_arena, key);
setAttributeValue(attribute, key, Range{lower_bound, upper_bound}, attribute_column[key_index]);
setAttributeValue(attribute, key, RangeInterval{lower_bound, upper_bound}, attribute_column[key_index]);
keys_extractor.rollbackCurrentKey();
}
@ -550,18 +545,38 @@ void RangeHashedDictionary<dictionary_key_type>::blockToAttributes(const Block &
}
}
template <DictionaryKeyType dictionary_key_type>
void RangeHashedDictionary<dictionary_key_type>::buildAttributeIntervalTrees()
{
for (auto & attribute : attributes)
{
auto type_call = [&](const auto & dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>;
auto & collection = std::get<CollectionType<ValueType>>(attribute.maps);
for (auto & [_, ranges] : collection)
ranges.build();
};
callOnDictionaryAttributeType(attribute.type, type_call);
}
}
template <DictionaryKeyType dictionary_key_type>
template <typename T>
void RangeHashedDictionary<dictionary_key_type>::setAttributeValueImpl(Attribute & attribute, KeyType key, const Range & range, const Field & value)
void RangeHashedDictionary<dictionary_key_type>::setAttributeValueImpl(Attribute & attribute, KeyType key, const RangeInterval & interval, const Field & value)
{
using ValueType = std::conditional_t<std::is_same_v<T, String>, StringRef, T>;
auto & collection = std::get<CollectionType<ValueType>>(attribute.maps);
Value<ValueType> value_to_insert;
std::optional<ValueType> value_to_insert;
if (attribute.is_nullable && value.isNull())
{
value_to_insert = { range, {} };
value_to_insert = std::nullopt;
}
else
{
@ -569,11 +584,11 @@ void RangeHashedDictionary<dictionary_key_type>::setAttributeValueImpl(Attribute
{
const auto & string = value.get<String>();
StringRef string_ref = copyStringInArena(string_arena, string);
value_to_insert = Value<ValueType>{ range, { string_ref }};
value_to_insert = { string_ref };
}
else
{
value_to_insert = Value<ValueType>{ range, { value.get<ValueType>() }};
value_to_insert = { value.get<ValueType>() };
}
}
@ -582,33 +597,25 @@ void RangeHashedDictionary<dictionary_key_type>::setAttributeValueImpl(Attribute
if (it)
{
auto & values = it->getMapped();
const auto insert_it = std::lower_bound(
std::begin(values),
std::end(values),
range,
[](const Value<ValueType> & lhs, const Range & rhs_range)
{
return lhs.range < rhs_range;
});
values.insert(insert_it, std::move(value_to_insert));
values.emplace(interval, std::move(value_to_insert));
}
else
{
collection.insert({key, Values<ValueType>{std::move(value_to_insert)}});
Values<ValueType> values;
values.emplace(interval, value_to_insert);
collection.insert({key, std::move(values)});
}
}
template <DictionaryKeyType dictionary_key_type>
void RangeHashedDictionary<dictionary_key_type>::setAttributeValue(Attribute & attribute, KeyType key, const Range & range, const Field & value)
void RangeHashedDictionary<dictionary_key_type>::setAttributeValue(Attribute & attribute, KeyType key, const RangeInterval & interval, const Field & value)
{
auto type_call = [&](const auto &dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
setAttributeValueImpl<AttributeType>(attribute, key, range, value);
setAttributeValueImpl<AttributeType>(attribute, key, interval, value);
};
callOnDictionaryAttributeType(attribute.type, type_call);
@ -650,14 +657,15 @@ void RangeHashedDictionary<dictionary_key_type>::getKeysAndDates(
end_dates.reserve(collection.size());
const bool is_date = isDate(dict_struct.range_min->type);
(void)(is_date);
for (const auto & key : collection)
{
for (const auto & value : key.getMapped())
for (const auto & [interval, _] : key.getMapped())
{
keys.push_back(key.getKey());
start_dates.push_back(value.range.left);
end_dates.push_back(value.range.right);
start_dates.push_back(interval.left);
end_dates.push_back(interval.right);
if constexpr (std::numeric_limits<RangeType>::max() > DATE_LUT_MAX_DAY_NUM) /// Avoid warning about tautological comparison in next line.
if (is_date && static_cast<UInt64>(end_dates.back()) > DATE_LUT_MAX_DAY_NUM)
@ -676,7 +684,7 @@ PaddedPODArray<Int64> RangeHashedDictionary<dictionary_key_type>::makeDateKeys(
for (size_t i = 0; i < keys.size(); ++i)
{
if (Range::isCorrectDate(block_start_dates[i]))
if (isCorrectDate(block_start_dates[i]))
keys[i] = block_start_dates[i]; // NOLINT
else
keys[i] = block_end_dates[i]; // NOLINT

View File

@ -8,26 +8,19 @@
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <Common/HashTable/HashMap.h>
#include <Common/HashTable/HashSet.h>
#include <Common/IntervalTree.h>
#include <Dictionaries/DictionaryStructure.h>
#include <Dictionaries/IDictionary.h>
#include <Dictionaries/IDictionarySource.h>
#include <Dictionaries/DictionaryHelpers.h>
namespace DB
{
using RangeStorageType = Int64;
struct Range
{
RangeStorageType left;
RangeStorageType right;
static bool isCorrectDate(const RangeStorageType & date);
bool contains(const RangeStorageType & value) const;
};
template <DictionaryKeyType dictionary_key_type>
class RangeHashedDictionary final : public IDictionary
{
@ -94,15 +87,11 @@ public:
Pipe read(const Names & column_names, size_t max_block_size, size_t num_streams) const override;
private:
template <typename T>
struct Value final
{
Range range;
std::optional<T> value;
};
using RangeInterval = Interval<RangeStorageType>;
template <typename T>
using Values = std::vector<Value<T>>;
using Values = IntervalMap<RangeInterval, std::optional<T>>;
template <typename Value>
using CollectionType = std::conditional_t<
@ -160,10 +149,12 @@ private:
void blockToAttributes(const Block & block);
template <typename T>
void setAttributeValueImpl(Attribute & attribute, KeyType key, const Range & range, const Field & value);
void buildAttributeIntervalTrees();
void setAttributeValue(Attribute & attribute, KeyType key, const Range & range, const Field & value);
template <typename T>
void setAttributeValueImpl(Attribute & attribute, KeyType key, const RangeInterval & interval, const Field & value);
void setAttributeValue(Attribute & attribute, KeyType key, const RangeInterval & interval, const Field & value);
template <typename RangeType>
void getKeysAndDates(

View File

@ -0,0 +1,126 @@
<test>
<create_query>
CREATE TABLE simple_key_range_hashed_dictionary_source_table
(
id UInt64,
value UInt64,
start UInt64,
end UInt64
) ENGINE = Memory;
</create_query>
<create_query>
CREATE TABLE complex_key_range_hashed_dictionary_source_table
(
id UInt64,
id_key String,
value UInt64,
start UInt64,
end UInt64
) ENGINE = Memory;
</create_query>
<create_query>
CREATE DICTIONARY simple_key_range_hashed_dictionary
(
id UInt64,
value UInt64,
start UInt64,
end UInt64
)
PRIMARY KEY id
SOURCE(CLICKHOUSE(DB 'default' TABLE 'simple_key_range_hashed_dictionary_source_table'))
LAYOUT(RANGE_HASHED())
RANGE(MIN start MAX end)
LIFETIME(MIN 0 MAX 1000);
</create_query>
<create_query>
CREATE DICTIONARY complex_key_range_hashed_dictionary
(
id UInt64,
id_key String,
value UInt64,
start UInt64,
end UInt64
)
PRIMARY KEY id, id_key
SOURCE(CLICKHOUSE(DB 'default' TABLE 'complex_key_range_hashed_dictionary_source_table'))
LAYOUT(COMPLEX_KEY_RANGE_HASHED())
RANGE(MIN start MAX end)
LIFETIME(MIN 0 MAX 1000);
</create_query>
<fill_query>
INSERT INTO simple_key_range_hashed_dictionary_source_table
SELECT key, key, range_start * 2, range_start * 2 + 1 FROM
(SELECT number as key FROM numbers(10000)) as keys,
(SELECT number as range_start FROM numbers(1000)) as ranges;
</fill_query>
<fill_query>
INSERT INTO complex_key_range_hashed_dictionary_source_table
SELECT key, toString(key), key, range_start * 2, range_start * 2 + 1 FROM
(SELECT number as key FROM numbers(10000)) as keys,
(SELECT number as range_start FROM numbers(1000)) as ranges;
</fill_query>
<substitutions>
<substitution>
<name>elements_count</name>
<values>
<value>500000</value>
<value>750000</value>
</values>
</substitution>
</substitutions>
<query>
WITH rand64() % 5000 as key
SELECT dictGet('default.simple_key_range_hashed_dictionary', 'value', toUInt64(key), key)
FROM system.numbers
LIMIT {elements_count}
FORMAT Null;
</query>
<query>
WITH rand64() % 5000 as key
SELECT dictHas('default.simple_key_range_hashed_dictionary', toUInt64(key), key)
FROM system.numbers
LIMIT {elements_count}
FORMAT Null;
</query>
<query>
SELECT * FROM simple_key_range_hashed_dictionary
FORMAT Null;
</query>
<query>
WITH (rand64() % toUInt64(5000) as key, toString(key) as key_id) as complex_key
SELECT dictGet('default.complex_key_range_hashed_dictionary', 'value', complex_key, key)
FROM system.numbers
LIMIT {elements_count}
FORMAT Null;
</query>
<query>
WITH (rand64() % toUInt64(5000) as key, toString(key) as key_id) as complex_key
SELECT dictHas('default.complex_key_range_hashed_dictionary', complex_key, key)
FROM system.numbers
LIMIT {elements_count}
FORMAT Null;
</query>
<query>
SELECT * FROM complex_key_range_hashed_dictionary
FORMAT Null;
</query>
<drop_query>DROP TABLE IF EXISTS simple_key_range_hashed_dictionary_source_table;</drop_query>
<drop_query>DROP TABLE IF EXISTS complex_key_range_hashed_dictionary_source_table;</drop_query>
<drop_query>DROP DICTIONARY IF EXISTS simple_key_range_hashed_dictionary;</drop_query>
<drop_query>DROP DICTIONARY IF EXISTS complex_key_range_hashed_dictionary;</drop_query>
</test>

View File

@ -45,13 +45,13 @@ SELECT dictHas('database_for_range_dict.range_dictionary', toUInt64(2), toDate('
SELECT dictHas('database_for_range_dict.range_dictionary', toUInt64(2), toDate('2019-05-31'));
SELECT 'select columns from dictionary';
SELECT 'allColumns';
SELECT * FROM database_for_range_dict.range_dictionary;
SELECT * FROM database_for_range_dict.range_dictionary ORDER BY CountryID, StartDate, EndDate;
SELECT 'noColumns';
SELECT 1 FROM database_for_range_dict.range_dictionary;
SELECT 1 FROM database_for_range_dict.range_dictionary ORDER BY CountryID, StartDate, EndDate;
SELECT 'onlySpecificColumns';
SELECT CountryID, StartDate, Tax FROM database_for_range_dict.range_dictionary;
SELECT CountryID, StartDate, Tax FROM database_for_range_dict.range_dictionary ORDER BY CountryID, StartDate, EndDate;
SELECT 'onlySpecificColumn';
SELECT Tax FROM database_for_range_dict.range_dictionary;
SELECT Tax FROM database_for_range_dict.range_dictionary ORDER BY CountryID, StartDate, EndDate;
DROP DICTIONARY database_for_range_dict.range_dictionary;
DROP TABLE database_for_range_dict.date_table;
@ -97,13 +97,13 @@ SELECT dictHas('database_for_range_dict.range_dictionary_nullable', toUInt64(2),
SELECT dictHas('database_for_range_dict.range_dictionary_nullable', toUInt64(2), toDate('2019-05-31'));
SELECT 'select columns from dictionary';
SELECT 'allColumns';
SELECT * FROM database_for_range_dict.range_dictionary_nullable;
SELECT * FROM database_for_range_dict.range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate;
SELECT 'noColumns';
SELECT 1 FROM database_for_range_dict.range_dictionary_nullable;
SELECT 1 FROM database_for_range_dict.range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate;
SELECT 'onlySpecificColumns';
SELECT CountryID, StartDate, Tax FROM database_for_range_dict.range_dictionary_nullable;
SELECT CountryID, StartDate, Tax FROM database_for_range_dict.range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate;
SELECT 'onlySpecificColumn';
SELECT Tax FROM database_for_range_dict.range_dictionary_nullable;
SELECT Tax FROM database_for_range_dict.range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate;
DROP DICTIONARY database_for_range_dict.range_dictionary_nullable;
DROP TABLE database_for_range_dict.date_table;

View File

@ -45,13 +45,13 @@ SELECT dictHas('range_dictionary', (toUInt64(2), '2'), toDate('2019-05-29'));
SELECT dictHas('range_dictionary', (toUInt64(2), '2'), toDate('2019-05-31'));
SELECT 'select columns from dictionary';
SELECT 'allColumns';
SELECT * FROM range_dictionary;
SELECT * FROM range_dictionary ORDER BY CountryID, StartDate, EndDate;
SELECT 'noColumns';
SELECT 1 FROM range_dictionary;
SELECT 1 FROM range_dictionary ORDER BY CountryID, StartDate, EndDate;
SELECT 'onlySpecificColumns';
SELECT CountryID, StartDate, Tax FROM range_dictionary;
SELECT CountryID, StartDate, Tax FROM range_dictionary ORDER BY CountryID, StartDate, EndDate;
SELECT 'onlySpecificColumn';
SELECT Tax FROM range_dictionary;
SELECT Tax FROM range_dictionary ORDER BY CountryID, StartDate, EndDate;
DROP TABLE date_table;
DROP DICTIONARY range_dictionary;
@ -99,13 +99,13 @@ SELECT dictHas('range_dictionary_nullable', (toUInt64(2), '2'), toDate('2019-05-
SELECT dictHas('range_dictionary_nullable', (toUInt64(2), '2'), toDate('2019-05-31'));
SELECT 'select columns from dictionary';
SELECT 'allColumns';
SELECT * FROM range_dictionary_nullable;
SELECT * FROM range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate;
SELECT 'noColumns';
SELECT 1 FROM range_dictionary_nullable;
SELECT 1 FROM range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate;
SELECT 'onlySpecificColumns';
SELECT CountryID, StartDate, Tax FROM range_dictionary_nullable;
SELECT CountryID, StartDate, Tax FROM range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate;
SELECT 'onlySpecificColumn';
SELECT Tax FROM range_dictionary_nullable;
SELECT Tax FROM range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate;
DROP TABLE date_table;
DROP DICTIONARY range_dictionary_nullable;