diff --git a/src/Common/IntervalTree.h b/src/Common/IntervalTree.h new file mode 100644 index 00000000000..fd2fec528a4 --- /dev/null +++ b/src/Common/IntervalTree.h @@ -0,0 +1,683 @@ +#pragma once + +#include + +#include +#include + + +namespace DB +{ + +/** Structure that holds closed interval with left and right. + * Example: [1, 1] is valid interval, that contain point 1. + */ +template +struct Interval +{ + using IntervalStorageType = TIntervalStorageType; + IntervalStorageType left; + IntervalStorageType right; + + Interval(IntervalStorageType left_, IntervalStorageType right_) : left(left_), right(right_) { } + + inline bool contains(IntervalStorageType point) const { return left <= point && point <= right; } +}; + +template +bool operator<(const Interval & lhs, const Interval & rhs) +{ + return std::tie(lhs.left, lhs.right) < std::tie(rhs.left, rhs.right); +} + +template +bool operator<=(const Interval & lhs, const Interval & rhs) +{ + return std::tie(lhs.left, lhs.right) <= std::tie(rhs.left, rhs.right); +} + +template +bool operator==(const Interval & lhs, const Interval & rhs) +{ + return std::tie(lhs.left, lhs.right) == std::tie(rhs.left, rhs.right); +} + +template +bool operator!=(const Interval & lhs, const Interval & rhs) +{ + return std::tie(lhs.left, lhs.right) != std::tie(rhs.left, rhs.right); +} + +template +bool operator>(const Interval & lhs, const Interval & rhs) +{ + return std::tie(lhs.left, lhs.right) > std::tie(rhs.left, rhs.right); +} + +template +bool operator>=(const Interval & lhs, const Interval & rhs) +{ + return std::tie(lhs.left, lhs.right) >= std::tie(rhs.left, rhs.right); +} + +struct IntervalTreeVoidValue +{ +}; + +/** Tree structure that allow to efficiently retrieve all intervals that intersect specific point. + * https://en.wikipedia.org/wiki/Interval_tree + * + * Search for all intervals intersecting point has complexity O(log(n) + k), k is count of intervals that intersect point. + * If we need to only check if there are some interval intersecting point such operation has complexity O(log(n)). + * + * Explanation: + * + * IntervalTree structure is balanced tree. Each node contains: + * 1. Point + * 2. Intervals sorted by left ascending that intersect that point. + * 3. Intervals sorted by right descending that intersect that point. + * + * Build: + * + * To keep tree relatively balanced we can use median of all segment points. + * On each step build tree node with intervals. For root node input intervals are all intervals. + * First split intervals in 4 groups. + * 1. Intervals that lie that are less than median point. Interval right is less than median point. + * 2. Intervals that lie that are greater than median point. Interval right is less than median point. + * 3. Intervals that intersect node sorted by left ascending. + * 4. Intervals that intersect node sorted by right descending. + * + * If intervals in 1 group are not empty. Continue build left child recursively with intervals from 1 group. + * If intervals in 2 group are not empty. Continue build right child recursively with intervals from 2 group. + * + * Search: + * + * Search for intervals intersecting point is started from root node. + * If search point is less than point in node, then we check intervals sorted by left ascending + * until left is greater than search point. + * If there is left child, continue search recursively in left child. + * + * If search point is greater than point in node, then we check intervals sorted by right descending + * until right is less than search point. + * If there is right child, continue search recursively in right child. + * + * If search point is equal to point in node, then we can emit all intervals that intersect current tree node + * and stop searching. + * + * Additional details: + * 1. To improve cache locality tree is stored implicitly in array, after build method is called + * other intervals cannot be added to the tree. + * 2. Additionally to improve cache locality in tree node we store sorted intervals for all nodes in separate + * array. In node we store only start of its sorted intervals, and also size of intersecting intervals. + * If we need to retrieve intervals sorted by left ascending they will be stored in indexes + * [sorted_intervals_start_index, sorted_intervals_start_index + intersecting_intervals_size). + * If we need to retrieve intervals sorted by right descending they will be store in indexes + * [sorted_intervals_start_index + intersecting_intervals_size, sorted_intervals_start_index + intersecting_intervals_size * 2). + */ +template +class IntervalTree +{ +public: + using IntervalStorageType = typename Interval::IntervalStorageType; + + static constexpr bool is_empty_value = std::is_same_v; + + IntervalTree() { nodes.resize(1); } + + template , bool> = true> + void emplace(Interval interval) + { + assert(!tree_is_built); + sorted_intervals.emplace_back(interval); + increaseIntervalsSize(); + } + + template , bool> = true, typename... Args> + void emplace(Interval interval, Args &&... args) + { + assert(!tree_is_built); + sorted_intervals.emplace_back( + std::piecewise_construct, std::forward_as_tuple(interval), std::forward_as_tuple(std::forward(args)...)); + increaseIntervalsSize(); + } + + template , bool> = true> + void insert(Interval interval) + { + assert(!tree_is_built); + sorted_intervals.emplace_back(interval); + increaseIntervalsSize(); + } + + template , bool> = true> + void insert(Interval interval, const Value & value) + { + assert(!tree_is_built); + sorted_intervals.emplace_back(interval, value); + increaseIntervalsSize(); + } + + template , bool> = true> + void insert(Interval interval, Value && value) + { + assert(!tree_is_built); + sorted_intervals.emplace_back(interval, std::move(value)); + increaseIntervalsSize(); + } + + /// Build tree, after that intervals cannot be inserted, and only search or iteration can be performed. + void build() + { + assert(!tree_is_built); + nodes.clear(); + nodes.reserve(sorted_intervals.size()); + buildTree(); + tree_is_built = true; + } + + /** Find all intervals intersecting point. + * + * Callback interface for IntervalSet: + * + * template + * struct IntervalSetCallback + * { + * bool operator()(const IntervalType & interval) + * { + * bool should_continue_interval_iteration = false; + * return should_continue_interval_iteration; + * } + * }; + * + * Callback interface for IntervalMap: + * + * template + * struct IntervalMapCallback + * { + * bool operator()(const IntervalType & interval, const Value & value) + * { + * bool should_continue_interval_iteration = false; + * return should_continue_interval_iteration; + * } + * }; + */ + + template + void find(IntervalStorageType point, IntervalCallback && callback) const + { + if (unlikely(!tree_is_built)) + { + findIntervalsNonConstructedImpl(point, callback); + return; + } + + findIntervalsImpl(point, callback); + } + + /// Check if there is an interval intersecting point + bool has(IntervalStorageType point) const + { + bool has_intervals = false; + + if constexpr (is_empty_value) + { + find(point, [&](auto &) + { + has_intervals = true; + return false; + }); + } + else + { + find(point, [&](auto &, auto &) + { + has_intervals = true; + return false; + }); + } + + return has_intervals; + } + + class Iterator; + using iterator = Iterator; + using const_iterator = Iterator; + + iterator begin() + { + size_t start_index = findFirstIteratorNodeIndex(); + return Iterator(start_index, 0, this); + } + + iterator end() + { + size_t end_index = findLastIteratorNodeIndex(); + size_t last_interval_index = 0; + + if (likely(end_index < nodes.size())) + last_interval_index = nodes[end_index].sorted_intervals_range_size; + + return Iterator(end_index, last_interval_index, this); + } + + const_iterator begin() const + { + size_t start_index = findFirstIteratorNodeIndex(); + return Iterator(start_index, 0, this); + } + + const_iterator end() const + { + size_t end_index = findLastIteratorNodeIndex(); + size_t last_interval_index = 0; + + if (likely(end_index < nodes.size())) + last_interval_index = nodes[end_index].sorted_intervals_range_size; + + return Iterator(end_index, last_interval_index, this); + } + + const_iterator cbegin() const { return begin(); } + + const_iterator cend() const { return end(); } + + size_t getIntervalsSize() const { return intervals_size; } + +private: + struct Node + { + size_t sorted_intervals_range_start_index; + size_t sorted_intervals_range_size; + + IntervalStorageType middle_element; + + inline bool hasValue() const { return sorted_intervals_range_size != 0; } + }; + + using IntervalWithEmptyValue = Interval; + using IntervalWithNonEmptyValue = std::pair; + + using IntervalWithValue = std::conditional_t; + +public: + class Iterator + { + public: + bool operator==(const Iterator & rhs) const + { + return node_index == rhs.node_index && current_interval_index == rhs.current_interval_index && tree == rhs.tree; + } + + bool operator!=(const Iterator & rhs) const { return !(*this == rhs); } + + const IntervalWithValue & operator*() { return getCurrentValue(); } + + const IntervalWithValue & operator*() const { return getCurrentValue(); } + + const IntervalWithValue * operator->() { return &getCurrentValue(); } + + const IntervalWithValue * operator->() const { return &getCurrentValue(); } + + Iterator & operator++() + { + iterateToNext(); + return *this; + } + + Iterator operator++(int) // NOLINT + { + Iterator copy(*this); + iterateToNext(); + return copy; + } + + Iterator & operator--() + { + iterateToPrevious(); + return *this; + } + + Iterator operator--(int) // NOLINT + { + Iterator copy(*this); + iterateToPrevious(); + return copy; + } + + private: + friend class IntervalTree; + + Iterator(size_t node_index_, size_t current_interval_index_, const IntervalTree * tree_) + : node_index(node_index_), current_interval_index(current_interval_index_), tree(tree_) + { + } + + size_t node_index; + size_t current_interval_index; + const IntervalTree * tree; + + void iterateToNext() + { + size_t nodes_size = tree->nodes.size(); + auto & current_node = tree->nodes[node_index]; + + ++current_interval_index; + + if (current_interval_index < current_node.sorted_intervals_range_size) + return; + + size_t node_index_copy = node_index + 1; + for (; node_index_copy < nodes_size; ++node_index_copy) + { + auto & node = tree->nodes[node_index_copy]; + + if (node.hasValue()) + { + node_index = node_index_copy; + current_interval_index = 0; + break; + } + } + } + + void iterateToPrevious() + { + if (current_interval_index > 0) + { + --current_interval_index; + return; + } + + while (node_index > 0) + { + auto & node = tree->nodes[node_index - 1]; + if (node.hasValue()) + { + current_interval_index = node.sorted_intervals_range_size - 1; + break; + } + + --node_index; + } + } + + const IntervalWithValue & getCurrentValue() const + { + auto & current_node = tree->nodes[node_index]; + size_t interval_index = current_node.sorted_intervals_range_start_index + current_interval_index; + return tree->sorted_intervals[interval_index]; + } + }; + +private: + void buildTree() + { + std::vector temporary_points_storage; + temporary_points_storage.reserve(sorted_intervals.size() * 2); + + std::vector left_intervals; + std::vector right_intervals; + std::vector intervals_sorted_by_left_asc; + std::vector intervals_sorted_by_right_desc; + + struct StackFrame + { + size_t index; + std::vector intervals; + }; + + std::vector stack; + stack.emplace_back(StackFrame{0, std::move(sorted_intervals)}); + sorted_intervals.clear(); + + while (!stack.empty()) + { + auto frame = std::move(stack.back()); + stack.pop_back(); + + size_t current_index = frame.index; + auto & current_intervals = frame.intervals; + + if (current_intervals.empty()) + continue; + + if (current_index >= nodes.size()) + nodes.resize(current_index + 1); + + temporary_points_storage.clear(); + intervalsToPoints(current_intervals, temporary_points_storage); + auto median = pointsMedian(temporary_points_storage); + + left_intervals.clear(); + right_intervals.clear(); + intervals_sorted_by_left_asc.clear(); + intervals_sorted_by_right_desc.clear(); + + for (const auto & interval_with_value : current_intervals) + { + auto & interval = getInterval(interval_with_value); + + if (interval.right < median) + { + left_intervals.emplace_back(interval_with_value); + } + else if (interval.left > median) + { + right_intervals.emplace_back(interval_with_value); + } + else + { + intervals_sorted_by_left_asc.emplace_back(interval_with_value); + intervals_sorted_by_right_desc.emplace_back(interval_with_value); + } + } + + std::sort(intervals_sorted_by_left_asc.begin(), intervals_sorted_by_left_asc.end(), [](auto & lhs, auto & rhs) + { + auto & lhs_interval = getInterval(lhs); + auto & rhs_interval = getInterval(rhs); + return lhs_interval.left < rhs_interval.left; + }); + + std::sort(intervals_sorted_by_right_desc.begin(), intervals_sorted_by_right_desc.end(), [](auto & lhs, auto & rhs) + { + auto & lhs_interval = getInterval(lhs); + auto & rhs_interval = getInterval(rhs); + return lhs_interval.right > rhs_interval.right; + }); + + size_t sorted_intervals_range_start_index = sorted_intervals.size(); + + for (auto && interval_sorted_by_left_asc : intervals_sorted_by_left_asc) + sorted_intervals.emplace_back(std::move(interval_sorted_by_left_asc)); + + for (auto && interval_sorted_by_right_desc : intervals_sorted_by_right_desc) + sorted_intervals.emplace_back(std::move(interval_sorted_by_right_desc)); + + auto & node = nodes[current_index]; + node.middle_element = median; + node.sorted_intervals_range_start_index = sorted_intervals_range_start_index; + node.sorted_intervals_range_size = intervals_sorted_by_left_asc.size(); + + size_t left_child_index = current_index * 2 + 1; + stack.emplace_back(StackFrame{left_child_index, std::move(left_intervals)}); + + size_t right_child_index = current_index * 2 + 2; + stack.emplace_back(StackFrame{right_child_index, std::move(right_intervals)}); + } + } + + template + void findIntervalsImpl(IntervalStorageType point, IntervalCallback && callback) const + { + size_t current_index = 0; + + while (true) + { + if (current_index >= nodes.size()) + break; + + auto & node = nodes[current_index]; + if (!node.hasValue()) + break; + + auto middle_element = node.middle_element; + + if (point < middle_element) + { + size_t start = node.sorted_intervals_range_start_index; + size_t end = start + node.sorted_intervals_range_size; + + for (; start != end; ++start) + { + auto & interval_with_value_left_sorted_asc = sorted_intervals[start]; + auto & interval_left_sorted_asc = getInterval(interval_with_value_left_sorted_asc); + if (interval_left_sorted_asc.left > point) + break; + + bool should_continue = callCallback(interval_with_value_left_sorted_asc, callback); + if (unlikely(!should_continue)) + return; + } + + size_t left_child_index = current_index * 2 + 1; + current_index = left_child_index; + } + else + { + size_t start = node.sorted_intervals_range_start_index + node.sorted_intervals_range_size; + size_t end = start + node.sorted_intervals_range_size; + + for (; start != end; ++start) + { + auto & interval_with_value_right_sorted_desc = sorted_intervals[start]; + auto & interval_right_sorted_desc = getInterval(interval_with_value_right_sorted_desc); + if (interval_right_sorted_desc.right < point) + break; + + bool should_continue = callCallback(interval_with_value_right_sorted_desc, callback); + if (unlikely(!should_continue)) + return; + } + + if (likely(point > middle_element)) + { + size_t right_child_index = current_index * 2 + 2; + current_index = right_child_index; + } + else + { + /// This is case when point == middle_element. + break; + } + } + } + } + + template + void findIntervalsNonConstructedImpl(IntervalStorageType point, IntervalCallback && callback) const + { + for (auto & interval_with_value : sorted_intervals) + { + auto & interval = getInterval(interval_with_value); + + if (interval.contains(point)) + callCallback(interval_with_value, callback); + } + } + + inline size_t findFirstIteratorNodeIndex() const + { + size_t nodes_size = nodes.size(); + size_t result_index = 0; + + for (; result_index < nodes_size; ++result_index) + { + if (nodes[result_index].hasValue()) + break; + } + + if (unlikely(result_index == nodes_size)) + result_index = 0; + + return result_index; + } + + inline size_t findLastIteratorNodeIndex() const + { + if (unlikely(nodes.empty())) + return 0; + + size_t nodes_size = nodes.size(); + size_t result_index = nodes_size - 1; + for (; result_index != 0; --result_index) + { + if (nodes[result_index].hasValue()) + break; + } + + return result_index; + } + + inline void increaseIntervalsSize() + { + /// Before tree is build we store all intervals size in our first node to allow tree iteration. + ++intervals_size; + nodes[0].sorted_intervals_range_size = intervals_size; + } + + std::vector nodes; + std::vector sorted_intervals; + size_t intervals_size = 0; + bool tree_is_built = false; + + static inline const Interval & getInterval(const IntervalWithValue & interval_with_value) + { + if constexpr (is_empty_value) + return interval_with_value; + else + return interval_with_value.first; + } + + template + static inline bool callCallback(const IntervalWithValue & interval, IntervalCallback && callback) + { + if constexpr (is_empty_value) + return callback(interval); + else + return callback(interval.first, interval.second); + } + + static inline void + intervalsToPoints(const std::vector & intervals, std::vector & temporary_points_storage) + { + for (const auto & interval_with_value : intervals) + { + auto & interval = getInterval(interval_with_value); + temporary_points_storage.emplace_back(interval.left); + temporary_points_storage.emplace_back(interval.right); + } + } + + static inline IntervalStorageType pointsMedian(std::vector & points) + { + size_t size = points.size(); + size_t middle_element_index = size / 2; + + std::nth_element(points.begin(), points.begin() + middle_element_index, points.end()); + + /** We should not get median as average of middle_element_index and middle_element_index - 1 + * because we want point in node to intersect some interval. + * Example: Intervals [1, 1], [3, 3]. If we choose 2 as average point, it does not intersect any interval. + */ + return points[middle_element_index]; + } +}; + +template +using IntervalSet = IntervalTree; + +template +using IntervalMap = IntervalTree; + +} diff --git a/src/Common/examples/CMakeLists.txt b/src/Common/examples/CMakeLists.txt index be91101ef40..7b21591f83e 100644 --- a/src/Common/examples/CMakeLists.txt +++ b/src/Common/examples/CMakeLists.txt @@ -83,3 +83,6 @@ target_link_libraries (executable_udf PRIVATE dbms) add_executable(hive_metastore_client hive_metastore_client.cpp) target_link_libraries (hive_metastore_client PUBLIC hivemetastore ${THRIFT_LIBRARY}) + +add_executable (interval_tree interval_tree.cpp) +target_link_libraries (interval_tree PRIVATE dbms) diff --git a/src/Common/examples/interval_tree.cpp b/src/Common/examples/interval_tree.cpp new file mode 100644 index 00000000000..086fab37bbe --- /dev/null +++ b/src/Common/examples/interval_tree.cpp @@ -0,0 +1,95 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +using namespace DB; +using Int64Interval = Interval; + +int main(int, char **) +{ + { + IntervalSet tree; + + tree.emplace(Int64Interval(0, 5)); + tree.emplace(Int64Interval(10, 15)); + + tree.build(); + + for (const auto & interval : tree) + { + std::cout << "Interval left " << interval.left << " right " << interval.right << std::endl; + } + } + { + IntervalMap tree; + + tree.emplace(Int64Interval(0, 5), "value1"); + tree.emplace(Int64Interval(10, 15), "value2"); + + tree.build(); + + for (const auto & [interval, value] : tree) + { + std::cout << "Interval left " << interval.left << " right " << interval.right; + std::cout << " value " << value << std::endl; + } + } + { + IntervalSet tree; + for (size_t i = 0; i < 5; ++i) + { + tree.emplace(Int64Interval(0, i)); + } + + tree.build(); + + for (const auto & interval : tree) + { + std::cout << "Interval left " << interval.left << " right " << interval.right << std::endl; + } + + for (Int64 i = 0; i < 5; ++i) + { + tree.find(i, [](auto & interval) + { + std::cout << "Interval left " << interval.left << " right " << interval.right << std::endl; + return true; + }); + } + } + { + IntervalMap tree; + for (size_t i = 0; i < 5; ++i) + { + tree.emplace(Int64Interval(0, i), "Value " + std::to_string(i)); + } + + tree.build(); + + for (const auto & [interval, value] : tree) + { + std::cout << "Interval left " << interval.left << " right " << interval.right; + std::cout << " value " << value << std::endl; + } + + for (Int64 i = 0; i < 5; ++i) + { + tree.find(i, [](auto & interval, auto & value) + { + std::cout << "Interval left " << interval.left << " right " << interval.right; + std::cout << " value " << value << std::endl; + + return true; + }); + } + } + + return 0; +} diff --git a/src/Common/tests/gtest_interval_tree.cpp b/src/Common/tests/gtest_interval_tree.cpp new file mode 100644 index 00000000000..d9f19841b66 --- /dev/null +++ b/src/Common/tests/gtest_interval_tree.cpp @@ -0,0 +1,540 @@ +#include + +#include +#include + +#include +#include + + +using namespace DB; +using Int64Interval = Interval; + +template +std::set intervalSetToSet(const IntervalSet & interval_set) +{ + std::set result; + + for (const auto & interval : interval_set) + result.insert(interval); + + return result; +} + +template +std::map intervalMapToMap(const IntervalMap & interval_map) +{ + std::map result; + + for (const auto & [interval, value] : interval_map) + result.emplace(interval, value); + + return result; +} + +template +struct CollectIntervalsSetCallback +{ + explicit CollectIntervalsSetCallback(std::set & result_intervals_) + : result_intervals(result_intervals_) + { + } + + bool operator()(IntervalType interval) + { + result_intervals.insert(interval); + return true; + } + + std::set & result_intervals; +}; + +using CollectIntervalsSetInt64Callback = CollectIntervalsSetCallback; + +template +std::set intervalSetFindIntervals(const IntervalSet & interval_set, typename IntervalType::IntervalStorageType point) +{ + std::set result; + CollectIntervalsSetCallback callback(result); + + interval_set.find(point, callback); + + return result; +} + +template +struct CollectIntervalsMapCallback +{ + explicit CollectIntervalsMapCallback(std::map & result_intervals_) + : result_intervals(result_intervals_) + { + } + + bool operator()(IntervalType interval, const Value & value) + { + result_intervals.emplace(interval, value); + return true; + } + + std::map & result_intervals; +}; + + +template +std::map intervalMapFindIntervals(const IntervalMap & interval_set, typename IntervalType::IntervalStorageType point) +{ + std::map result; + CollectIntervalsMapCallback callback(result); + + interval_set.find(point, callback); + + return result; +} + +TEST(IntervalTree, IntervalSetBasic) +{ + for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size) + { + std::set expected; + IntervalSet set; + + for (size_t interval_index = 0; interval_index < intervals_size; ++interval_index) + { + auto interval = Int64Interval(interval_index * 2, interval_index * 2 + 1); + expected.insert(interval); + set.insert(interval); + } + + ASSERT_TRUE(set.getIntervalsSize() == expected.size()); + ASSERT_TRUE(set.getIntervalsSize() == intervals_size); + ASSERT_TRUE(intervalSetToSet(set) == expected); + + for (const auto & expected_interval : expected) + { + std::set expected_intervals = {{expected_interval}}; + + auto actual_intervals = intervalSetFindIntervals(set, expected_interval.left); + ASSERT_TRUE(actual_intervals.size() == 1); + ASSERT_TRUE(actual_intervals == expected_intervals); + + actual_intervals = intervalSetFindIntervals(set, expected_interval.right); + ASSERT_TRUE(actual_intervals.size() == 1); + ASSERT_TRUE(actual_intervals == expected_intervals); + + ASSERT_TRUE(set.has(expected_interval.left)); + ASSERT_TRUE(set.has(expected_interval.right)); + } + + set.build(); + + ASSERT_TRUE(intervalSetToSet(set) == expected); + + for (const auto & expected_interval : expected) + { + auto actual_interval = intervalSetFindIntervals(set, expected_interval.left); + ASSERT_TRUE(actual_interval.size() == 1); + ASSERT_TRUE(actual_interval == std::set{expected_interval}); + + actual_interval = intervalSetFindIntervals(set, expected_interval.right); + ASSERT_TRUE(actual_interval.size() == 1); + ASSERT_TRUE(actual_interval == std::set{expected_interval}); + + ASSERT_TRUE(set.has(expected_interval.left)); + ASSERT_TRUE(set.has(expected_interval.right)); + } + } +} + +TEST(IntervalTree, IntervalSetPoints) +{ + for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size) + { + std::set expected; + IntervalSet set; + + for (size_t interval_index = 0; interval_index < intervals_size; ++interval_index) + { + auto interval = Int64Interval(interval_index, interval_index); + expected.insert(interval); + set.insert(interval); + } + + ASSERT_TRUE(set.getIntervalsSize() == expected.size()); + ASSERT_TRUE(set.getIntervalsSize() == intervals_size); + ASSERT_TRUE(intervalSetToSet(set) == expected); + + for (const auto & expected_interval : expected) + { + std::set expected_intervals = {{expected_interval}}; + + auto actual_intervals = intervalSetFindIntervals(set, expected_interval.left); + ASSERT_TRUE(actual_intervals.size() == 1); + ASSERT_TRUE(actual_intervals == expected_intervals); + + actual_intervals = intervalSetFindIntervals(set, expected_interval.right); + ASSERT_TRUE(actual_intervals.size() == 1); + ASSERT_TRUE(actual_intervals == expected_intervals); + + ASSERT_TRUE(set.has(expected_interval.left)); + ASSERT_TRUE(set.has(expected_interval.right)); + } + + set.build(); + + ASSERT_TRUE(intervalSetToSet(set) == expected); + + for (const auto & expected_interval : expected) + { + auto actual_interval = intervalSetFindIntervals(set, expected_interval.left); + ASSERT_TRUE(actual_interval.size() == 1); + ASSERT_TRUE(actual_interval == std::set{expected_interval}); + + actual_interval = intervalSetFindIntervals(set, expected_interval.right); + ASSERT_TRUE(actual_interval.size() == 1); + ASSERT_TRUE(actual_interval == std::set{expected_interval}); + + ASSERT_TRUE(set.has(expected_interval.left)); + ASSERT_TRUE(set.has(expected_interval.right)); + } + } +} + +TEST(IntervalTree, IntervalSetIntersectingIntervals) +{ + for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size) + { + std::set expected; + IntervalSet set; + + for (size_t interval_index = 0; interval_index < intervals_size; ++interval_index) + { + auto interval = Int64Interval(0, interval_index * 2 + 1); + expected.insert(interval); + set.insert(interval); + } + + ASSERT_TRUE(set.getIntervalsSize() == expected.size()); + ASSERT_TRUE(set.getIntervalsSize() == intervals_size); + ASSERT_TRUE(intervalSetToSet(set) == expected); + + for (const auto & expected_interval : expected) + { + auto actual_intervals = intervalSetFindIntervals(set, expected_interval.left); + ASSERT_TRUE(actual_intervals.size() == expected.size()); + ASSERT_TRUE(actual_intervals == expected); + + ASSERT_TRUE(set.has(expected_interval.left)); + ASSERT_TRUE(set.has(expected_interval.right)); + } + + set.build(); + + ASSERT_TRUE(intervalSetToSet(set) == expected); + + for (const auto & expected_interval : expected) + { + auto actual_intervals = intervalSetFindIntervals(set, expected_interval.left); + ASSERT_TRUE(actual_intervals.size() == expected.size()); + ASSERT_TRUE(actual_intervals == expected); + + ASSERT_TRUE(set.has(expected_interval.left)); + ASSERT_TRUE(set.has(expected_interval.right)); + } + } +} + +TEST(IntervalTree, IntervalSetIterators) +{ + { + IntervalSet set; + ASSERT_TRUE(set.begin() == set.end()); + ASSERT_TRUE(set.cbegin() == set.cend()); + set.build(); + ASSERT_TRUE(set.begin() == set.end()); + ASSERT_TRUE(set.cbegin() == set.cend()); + } + { + IntervalSet set; + set.emplace(Int64Interval(0, 5)); + ASSERT_TRUE(set.begin() != set.end()); + ASSERT_TRUE((*set.begin()).left == 0); + ASSERT_TRUE((*set.begin()).right == 5); + ASSERT_TRUE(set.begin()->left == 0); + ASSERT_TRUE(set.begin()->right == 5); + auto begin = set.begin(); + ++begin; + ASSERT_TRUE(begin == set.end()); + + begin = set.begin(); + begin++; + ASSERT_TRUE(begin == set.end()); + + auto end = set.end(); + --end; + ASSERT_TRUE(set.begin() == end); + + end = set.end(); + end--; + ASSERT_TRUE(set.begin() == end); + } + { + for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size) + { + std::set expected; + IntervalSet set; + + for (size_t interval_index = 0; interval_index < intervals_size; ++interval_index) + { + auto interval = Int64Interval(interval_index * 2, interval_index * 2 + 1); + set.insert(interval); + expected.insert(interval); + } + + auto end = set.end(); + auto begin = set.begin(); + + std::set actual; + + while (end != begin) + { + --end; + actual.insert(*end); + } + + if (end != begin) + actual.insert(*end); + + ASSERT_TRUE(actual == expected); + } + } +} + +TEST(IntervalTree, IntervalMapBasic) +{ + for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size) + { + std::map expected; + IntervalMap map; + + for (size_t interval_index = 0; interval_index < intervals_size; ++interval_index) + { + auto interval = Int64Interval(interval_index * 2, interval_index * 2 + 1); + auto value = std::to_string(interval.left); + expected.emplace(interval, value); + map.emplace(interval, value); + } + + ASSERT_TRUE(map.getIntervalsSize() == expected.size()); + ASSERT_TRUE(map.getIntervalsSize() == intervals_size); + ASSERT_TRUE(intervalMapToMap(map) == expected); + + for (const auto & [expected_interval, value] : expected) + { + std::map expected_intervals = {{expected_interval, std::to_string(expected_interval.left)}}; + + auto actual_intervals = intervalMapFindIntervals(map, expected_interval.left); + ASSERT_TRUE(actual_intervals.size() == 1); + ASSERT_TRUE(actual_intervals == expected_intervals); + + actual_intervals = intervalMapFindIntervals(map, expected_interval.right); + ASSERT_TRUE(actual_intervals.size() == 1); + ASSERT_TRUE(actual_intervals == expected_intervals); + + ASSERT_TRUE(map.has(expected_interval.left)); + ASSERT_TRUE(map.has(expected_interval.right)); + } + + map.build(); + + ASSERT_TRUE(intervalMapToMap(map) == expected); + + for (const auto & [expected_interval, value] : expected) + { + std::map expected_intervals = {{expected_interval, std::to_string(expected_interval.left)}}; + + auto actual_intervals = intervalMapFindIntervals(map, expected_interval.left); + ASSERT_TRUE(actual_intervals.size() == 1); + ASSERT_TRUE(actual_intervals == expected_intervals); + + actual_intervals = intervalMapFindIntervals(map, expected_interval.right); + ASSERT_TRUE(actual_intervals.size() == 1); + ASSERT_TRUE(actual_intervals == expected_intervals); + + ASSERT_TRUE(map.has(expected_interval.left)); + ASSERT_TRUE(map.has(expected_interval.right)); + } + } +} + +TEST(IntervalTree, IntervalMapPoints) +{ + for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size) + { + std::map expected; + IntervalMap map; + + for (size_t interval_index = 0; interval_index < intervals_size; ++interval_index) + { + auto interval = Int64Interval(interval_index, interval_index); + auto value = std::to_string(interval.left); + expected.emplace(interval, value); + map.emplace(interval, value); + } + + ASSERT_TRUE(map.getIntervalsSize() == expected.size()); + ASSERT_TRUE(map.getIntervalsSize() == intervals_size); + ASSERT_TRUE(intervalMapToMap(map) == expected); + + for (const auto & [expected_interval, value] : expected) + { + std::map expected_intervals = {{expected_interval, std::to_string(expected_interval.left)}}; + + auto actual_intervals = intervalMapFindIntervals(map, expected_interval.left); + ASSERT_TRUE(actual_intervals.size() == 1); + ASSERT_TRUE(actual_intervals == expected_intervals); + + actual_intervals = intervalMapFindIntervals(map, expected_interval.right); + ASSERT_TRUE(actual_intervals.size() == 1); + ASSERT_TRUE(actual_intervals == expected_intervals); + + ASSERT_TRUE(map.has(expected_interval.left)); + ASSERT_TRUE(map.has(expected_interval.right)); + } + + map.build(); + + ASSERT_TRUE(intervalMapToMap(map) == expected); + + for (const auto & [expected_interval, value] : expected) + { + std::map expected_intervals = {{expected_interval, std::to_string(expected_interval.left)}}; + + auto actual_intervals = intervalMapFindIntervals(map, expected_interval.left); + ASSERT_TRUE(actual_intervals.size() == 1); + ASSERT_TRUE(actual_intervals == expected_intervals); + + actual_intervals = intervalMapFindIntervals(map, expected_interval.right); + ASSERT_TRUE(actual_intervals.size() == 1); + ASSERT_TRUE(actual_intervals == expected_intervals); + + ASSERT_TRUE(map.has(expected_interval.left)); + ASSERT_TRUE(map.has(expected_interval.right)); + } + } +} + +TEST(IntervalTree, IntervalMapIntersectingIntervals) +{ + for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size) + { + std::map expected; + IntervalMap map; + + for (size_t interval_index = 0; interval_index < intervals_size; ++interval_index) + { + auto interval = Int64Interval(0, interval_index * 2 + 1); + auto value = std::to_string(interval.left); + expected.emplace(interval, value); + map.emplace(interval, value); + } + + ASSERT_TRUE(map.getIntervalsSize() == expected.size()); + ASSERT_TRUE(map.getIntervalsSize() == intervals_size); + ASSERT_TRUE(intervalMapToMap(map) == expected); + + for (const auto & [expected_interval, value] : expected) + { + auto actual_intervals = intervalMapFindIntervals(map, expected_interval.left); + + ASSERT_TRUE(actual_intervals.size() == expected.size()); + ASSERT_TRUE(actual_intervals == expected); + + ASSERT_TRUE(map.has(expected_interval.left)); + ASSERT_TRUE(map.has(expected_interval.right)); + } + + map.build(); + + ASSERT_TRUE(intervalMapToMap(map) == expected); + + for (const auto & [expected_interval, value] : expected) + { + auto actual_intervals = intervalMapFindIntervals(map, expected_interval.left); + + ASSERT_TRUE(actual_intervals.size() == expected.size()); + ASSERT_TRUE(actual_intervals == expected); + + ASSERT_TRUE(map.has(expected_interval.left)); + ASSERT_TRUE(map.has(expected_interval.right)); + } + } +} + +TEST(IntervalTree, IntervalMapIterators) +{ + { + IntervalMap map; + ASSERT_TRUE(map.begin() == map.end()); + ASSERT_TRUE(map.cbegin() == map.cend()); + map.build(); + ASSERT_TRUE(map.begin() == map.end()); + ASSERT_TRUE(map.cbegin() == map.cend()); + } + { + IntervalMap map; + map.emplace(Int64Interval(0, 5), "value"); + ASSERT_TRUE(map.begin() != map.end()); + ASSERT_TRUE((*map.begin()).first.left == 0); + ASSERT_TRUE((*map.begin()).first.right == 5); + ASSERT_TRUE((*map.begin()).second == "value"); + ASSERT_TRUE(map.begin()->first.left == 0); + ASSERT_TRUE(map.begin()->first.right == 5); + ASSERT_TRUE(map.begin()->second == "value"); + auto begin = map.begin(); + ++begin; + ASSERT_TRUE(begin == map.end()); + + begin = map.begin(); + begin++; + ASSERT_TRUE(begin == map.end()); + + auto end = map.end(); + --end; + ASSERT_TRUE(map.begin() == end); + + end = map.end(); + end--; + ASSERT_TRUE(map.begin() == end); + } + { + for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size) + { + std::map expected; + IntervalMap map; + + for (size_t interval_index = 0; interval_index < intervals_size; ++interval_index) + { + auto interval = Int64Interval(interval_index * 2, interval_index * 2 + 1); + auto value = std::to_string(interval.left); + map.emplace(interval, value); + expected.emplace(interval, value); + } + + auto end = map.end(); + auto begin = map.begin(); + + std::map actual; + + while (end != begin) + { + --end; + actual.insert(*end); + } + + if (end != begin) + actual.insert(*end); + + ASSERT_TRUE(actual == expected); + } + } +} diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp index 0d862573b65..2d98583d4a3 100644 --- a/src/Dictionaries/RangeHashedDictionary.cpp +++ b/src/Dictionaries/RangeHashedDictionary.cpp @@ -22,6 +22,11 @@ using RangeStorageType = DB::RangeStorageType; const RangeStorageType RANGE_MIN_NULL_VALUE = std::numeric_limits::max(); const RangeStorageType RANGE_MAX_NULL_VALUE = std::numeric_limits::lowest(); +bool isCorrectDate(const RangeStorageType & date) +{ + return 0 < date && date <= DATE_LUT_MAX_DAY_NUM; +} + // Handle both kinds of null values: explicit nulls of NullableColumn and 'implicit' nulls of Date type. RangeStorageType getColumnIntValueOrDefault(const DB::IColumn & column, size_t index, bool isDate, const RangeStorageType & default_value) { @@ -29,7 +34,7 @@ RangeStorageType getColumnIntValueOrDefault(const DB::IColumn & column, size_t i return default_value; const RangeStorageType result = static_cast(column.getInt(index)); - if (isDate && !DB::Range::isCorrectDate(result)) + if (isDate && !isCorrectDate(result)) return default_value; return result; @@ -57,20 +62,6 @@ namespace ErrorCodes extern const int UNSUPPORTED_METHOD; } -bool Range::isCorrectDate(const RangeStorageType & date) -{ - return 0 < date && date <= DATE_LUT_MAX_DAY_NUM; -} - -bool Range::contains(const RangeStorageType & value) const -{ - return left <= value && value <= right; -} - -static bool operator<(const Range & left, const Range & right) -{ - return std::tie(left.left, left.right) < std::tie(right.left, right.right); -} template RangeHashedDictionary::RangeHashedDictionary( @@ -260,16 +251,8 @@ ColumnUInt8::Ptr RangeHashedDictionary::hasKeys(const Colum if (it) { const auto date = dates[key_index]; - const auto & ranges_and_values = it->getMapped(); - const auto val_it = std::find_if( - std::begin(ranges_and_values), - std::end(ranges_and_values), - [date](const Value & v) - { - return v.range.contains(date); - }); - - out[key_index] = val_it != std::end(ranges_and_values); + const auto & interval_tree = it->getMapped(); + out[key_index] = interval_tree.has(date); keys_found += out[key_index]; } else @@ -324,6 +307,8 @@ void RangeHashedDictionary::loadData() updateData(); } + buildAttributeIntervalTrees(); + if (require_nonempty && 0 == element_count) throw Exception(ErrorCodes::DICTIONARY_IS_EMPTY, "{}: dictionary source is empty and 'require_nonempty' property is set."); @@ -407,30 +392,40 @@ void RangeHashedDictionary::getItemsImpl( if (it) { const auto date = dates[key_index]; - const auto & ranges_and_values = it->getMapped(); - const auto val_it = std::find_if( - std::begin(ranges_and_values), - std::end(ranges_and_values), - [date](const Value & v) - { - return v.range.contains(date); - }); + const auto & interval_tree = it->getMapped(); - if (val_it != std::end(ranges_and_values)) + std::optional min_value; + std::optional min_range; + bool has_interval = false; + + interval_tree.find(date, [&](auto & interval, auto & value) + { + has_interval = true; + + if (min_range && interval < *min_range) + min_range = interval; + else + min_range = interval; + + min_value = value; + + return true; + }); + + if (has_interval) { ++keys_found; - auto & value = val_it->value; if constexpr (is_nullable) { - if (value.has_value()) - set_value(key_index, *value, false); + if (min_value.has_value()) + set_value(key_index, *min_value, false); else set_value(key_index, default_value_extractor[key_index], true); } else { - set_value(key_index, *value, false); + set_value(key_index, *min_value, false); } keys_extractor.rollbackCurrentKey(); @@ -542,7 +537,7 @@ void RangeHashedDictionary::blockToAttributes(const Block & if constexpr (std::is_same_v) key = copyStringInArena(string_arena, key); - setAttributeValue(attribute, key, Range{lower_bound, upper_bound}, attribute_column[key_index]); + setAttributeValue(attribute, key, RangeInterval{lower_bound, upper_bound}, attribute_column[key_index]); keys_extractor.rollbackCurrentKey(); } @@ -550,18 +545,38 @@ void RangeHashedDictionary::blockToAttributes(const Block & } } +template +void RangeHashedDictionary::buildAttributeIntervalTrees() +{ + for (auto & attribute : attributes) + { + auto type_call = [&](const auto & dictionary_attribute_type) + { + using Type = std::decay_t; + using AttributeType = typename Type::AttributeType; + using ValueType = DictionaryValueType; + + auto & collection = std::get>(attribute.maps); + for (auto & [_, ranges] : collection) + ranges.build(); + }; + + callOnDictionaryAttributeType(attribute.type, type_call); + } +} + template template -void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, KeyType key, const Range & range, const Field & value) +void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, KeyType key, const RangeInterval & interval, const Field & value) { using ValueType = std::conditional_t, StringRef, T>; auto & collection = std::get>(attribute.maps); - Value value_to_insert; + std::optional value_to_insert; if (attribute.is_nullable && value.isNull()) { - value_to_insert = { range, {} }; + value_to_insert = std::nullopt; } else { @@ -569,11 +584,11 @@ void RangeHashedDictionary::setAttributeValueImpl(Attribute { const auto & string = value.get(); StringRef string_ref = copyStringInArena(string_arena, string); - value_to_insert = Value{ range, { string_ref }}; + value_to_insert = { string_ref }; } else { - value_to_insert = Value{ range, { value.get() }}; + value_to_insert = { value.get() }; } } @@ -582,33 +597,25 @@ void RangeHashedDictionary::setAttributeValueImpl(Attribute if (it) { auto & values = it->getMapped(); - - const auto insert_it = std::lower_bound( - std::begin(values), - std::end(values), - range, - [](const Value & lhs, const Range & rhs_range) - { - return lhs.range < rhs_range; - }); - - values.insert(insert_it, std::move(value_to_insert)); + values.emplace(interval, std::move(value_to_insert)); } else { - collection.insert({key, Values{std::move(value_to_insert)}}); + Values values; + values.emplace(interval, value_to_insert); + collection.insert({key, std::move(values)}); } } template -void RangeHashedDictionary::setAttributeValue(Attribute & attribute, KeyType key, const Range & range, const Field & value) +void RangeHashedDictionary::setAttributeValue(Attribute & attribute, KeyType key, const RangeInterval & interval, const Field & value) { auto type_call = [&](const auto &dictionary_attribute_type) { using Type = std::decay_t; using AttributeType = typename Type::AttributeType; - setAttributeValueImpl(attribute, key, range, value); + setAttributeValueImpl(attribute, key, interval, value); }; callOnDictionaryAttributeType(attribute.type, type_call); @@ -650,14 +657,15 @@ void RangeHashedDictionary::getKeysAndDates( end_dates.reserve(collection.size()); const bool is_date = isDate(dict_struct.range_min->type); + (void)(is_date); for (const auto & key : collection) { - for (const auto & value : key.getMapped()) + for (const auto & [interval, _] : key.getMapped()) { keys.push_back(key.getKey()); - start_dates.push_back(value.range.left); - end_dates.push_back(value.range.right); + start_dates.push_back(interval.left); + end_dates.push_back(interval.right); if constexpr (std::numeric_limits::max() > DATE_LUT_MAX_DAY_NUM) /// Avoid warning about tautological comparison in next line. if (is_date && static_cast(end_dates.back()) > DATE_LUT_MAX_DAY_NUM) @@ -676,7 +684,7 @@ PaddedPODArray RangeHashedDictionary::makeDateKeys( for (size_t i = 0; i < keys.size(); ++i) { - if (Range::isCorrectDate(block_start_dates[i])) + if (isCorrectDate(block_start_dates[i])) keys[i] = block_start_dates[i]; // NOLINT else keys[i] = block_end_dates[i]; // NOLINT diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h index a9b41a4c4d0..f31d6415dc8 100644 --- a/src/Dictionaries/RangeHashedDictionary.h +++ b/src/Dictionaries/RangeHashedDictionary.h @@ -8,26 +8,19 @@ #include #include #include -#include +#include + #include #include #include #include + namespace DB { using RangeStorageType = Int64; -struct Range -{ - RangeStorageType left; - RangeStorageType right; - - static bool isCorrectDate(const RangeStorageType & date); - bool contains(const RangeStorageType & value) const; -}; - template class RangeHashedDictionary final : public IDictionary { @@ -94,15 +87,11 @@ public: Pipe read(const Names & column_names, size_t max_block_size, size_t num_streams) const override; private: - template - struct Value final - { - Range range; - std::optional value; - }; + + using RangeInterval = Interval; template - using Values = std::vector>; + using Values = IntervalMap>; template using CollectionType = std::conditional_t< @@ -160,10 +149,12 @@ private: void blockToAttributes(const Block & block); - template - void setAttributeValueImpl(Attribute & attribute, KeyType key, const Range & range, const Field & value); + void buildAttributeIntervalTrees(); - void setAttributeValue(Attribute & attribute, KeyType key, const Range & range, const Field & value); + template + void setAttributeValueImpl(Attribute & attribute, KeyType key, const RangeInterval & interval, const Field & value); + + void setAttributeValue(Attribute & attribute, KeyType key, const RangeInterval & interval, const Field & value); template void getKeysAndDates( diff --git a/tests/performance/range_hashed_dictionary.xml b/tests/performance/range_hashed_dictionary.xml new file mode 100644 index 00000000000..bdf949cd1ff --- /dev/null +++ b/tests/performance/range_hashed_dictionary.xml @@ -0,0 +1,126 @@ + + + CREATE TABLE simple_key_range_hashed_dictionary_source_table + ( + id UInt64, + value UInt64, + start UInt64, + end UInt64 + ) ENGINE = Memory; + + + + CREATE TABLE complex_key_range_hashed_dictionary_source_table + ( + id UInt64, + id_key String, + value UInt64, + start UInt64, + end UInt64 + ) ENGINE = Memory; + + + + CREATE DICTIONARY simple_key_range_hashed_dictionary + ( + id UInt64, + value UInt64, + start UInt64, + end UInt64 + ) + PRIMARY KEY id + SOURCE(CLICKHOUSE(DB 'default' TABLE 'simple_key_range_hashed_dictionary_source_table')) + LAYOUT(RANGE_HASHED()) + RANGE(MIN start MAX end) + LIFETIME(MIN 0 MAX 1000); + + + + CREATE DICTIONARY complex_key_range_hashed_dictionary + ( + id UInt64, + id_key String, + value UInt64, + start UInt64, + end UInt64 + ) + PRIMARY KEY id, id_key + SOURCE(CLICKHOUSE(DB 'default' TABLE 'complex_key_range_hashed_dictionary_source_table')) + LAYOUT(COMPLEX_KEY_RANGE_HASHED()) + RANGE(MIN start MAX end) + LIFETIME(MIN 0 MAX 1000); + + + + INSERT INTO simple_key_range_hashed_dictionary_source_table + SELECT key, key, range_start * 2, range_start * 2 + 1 FROM + (SELECT number as key FROM numbers(10000)) as keys, + (SELECT number as range_start FROM numbers(1000)) as ranges; + + + + INSERT INTO complex_key_range_hashed_dictionary_source_table + SELECT key, toString(key), key, range_start * 2, range_start * 2 + 1 FROM + (SELECT number as key FROM numbers(10000)) as keys, + (SELECT number as range_start FROM numbers(1000)) as ranges; + + + + + elements_count + + 500000 + 750000 + + + + + + WITH rand64() % 5000 as key + SELECT dictGet('default.simple_key_range_hashed_dictionary', 'value', toUInt64(key), key) + FROM system.numbers + LIMIT {elements_count} + FORMAT Null; + + + + WITH rand64() % 5000 as key + SELECT dictHas('default.simple_key_range_hashed_dictionary', toUInt64(key), key) + FROM system.numbers + LIMIT {elements_count} + FORMAT Null; + + + + SELECT * FROM simple_key_range_hashed_dictionary + FORMAT Null; + + + + WITH (rand64() % toUInt64(5000) as key, toString(key) as key_id) as complex_key + SELECT dictGet('default.complex_key_range_hashed_dictionary', 'value', complex_key, key) + FROM system.numbers + LIMIT {elements_count} + FORMAT Null; + + + + WITH (rand64() % toUInt64(5000) as key, toString(key) as key_id) as complex_key + SELECT dictHas('default.complex_key_range_hashed_dictionary', complex_key, key) + FROM system.numbers + LIMIT {elements_count} + FORMAT Null; + + + + SELECT * FROM complex_key_range_hashed_dictionary + FORMAT Null; + + + DROP TABLE IF EXISTS simple_key_range_hashed_dictionary_source_table; + DROP TABLE IF EXISTS complex_key_range_hashed_dictionary_source_table; + + DROP DICTIONARY IF EXISTS simple_key_range_hashed_dictionary; + DROP DICTIONARY IF EXISTS complex_key_range_hashed_dictionary; + + diff --git a/tests/queries/0_stateless/01676_range_hashed_dictionary.sql b/tests/queries/0_stateless/01676_range_hashed_dictionary.sql index ff69d61b26b..7d1fc60e90d 100644 --- a/tests/queries/0_stateless/01676_range_hashed_dictionary.sql +++ b/tests/queries/0_stateless/01676_range_hashed_dictionary.sql @@ -45,13 +45,13 @@ SELECT dictHas('database_for_range_dict.range_dictionary', toUInt64(2), toDate(' SELECT dictHas('database_for_range_dict.range_dictionary', toUInt64(2), toDate('2019-05-31')); SELECT 'select columns from dictionary'; SELECT 'allColumns'; -SELECT * FROM database_for_range_dict.range_dictionary; +SELECT * FROM database_for_range_dict.range_dictionary ORDER BY CountryID, StartDate, EndDate; SELECT 'noColumns'; -SELECT 1 FROM database_for_range_dict.range_dictionary; +SELECT 1 FROM database_for_range_dict.range_dictionary ORDER BY CountryID, StartDate, EndDate; SELECT 'onlySpecificColumns'; -SELECT CountryID, StartDate, Tax FROM database_for_range_dict.range_dictionary; +SELECT CountryID, StartDate, Tax FROM database_for_range_dict.range_dictionary ORDER BY CountryID, StartDate, EndDate; SELECT 'onlySpecificColumn'; -SELECT Tax FROM database_for_range_dict.range_dictionary; +SELECT Tax FROM database_for_range_dict.range_dictionary ORDER BY CountryID, StartDate, EndDate; DROP DICTIONARY database_for_range_dict.range_dictionary; DROP TABLE database_for_range_dict.date_table; @@ -97,13 +97,13 @@ SELECT dictHas('database_for_range_dict.range_dictionary_nullable', toUInt64(2), SELECT dictHas('database_for_range_dict.range_dictionary_nullable', toUInt64(2), toDate('2019-05-31')); SELECT 'select columns from dictionary'; SELECT 'allColumns'; -SELECT * FROM database_for_range_dict.range_dictionary_nullable; +SELECT * FROM database_for_range_dict.range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate; SELECT 'noColumns'; -SELECT 1 FROM database_for_range_dict.range_dictionary_nullable; +SELECT 1 FROM database_for_range_dict.range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate; SELECT 'onlySpecificColumns'; -SELECT CountryID, StartDate, Tax FROM database_for_range_dict.range_dictionary_nullable; +SELECT CountryID, StartDate, Tax FROM database_for_range_dict.range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate; SELECT 'onlySpecificColumn'; -SELECT Tax FROM database_for_range_dict.range_dictionary_nullable; +SELECT Tax FROM database_for_range_dict.range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate; DROP DICTIONARY database_for_range_dict.range_dictionary_nullable; DROP TABLE database_for_range_dict.date_table; diff --git a/tests/queries/0_stateless/02008_complex_key_range_hashed_dictionary.sql b/tests/queries/0_stateless/02008_complex_key_range_hashed_dictionary.sql index 677879b1ebd..72cac481376 100644 --- a/tests/queries/0_stateless/02008_complex_key_range_hashed_dictionary.sql +++ b/tests/queries/0_stateless/02008_complex_key_range_hashed_dictionary.sql @@ -45,13 +45,13 @@ SELECT dictHas('range_dictionary', (toUInt64(2), '2'), toDate('2019-05-29')); SELECT dictHas('range_dictionary', (toUInt64(2), '2'), toDate('2019-05-31')); SELECT 'select columns from dictionary'; SELECT 'allColumns'; -SELECT * FROM range_dictionary; +SELECT * FROM range_dictionary ORDER BY CountryID, StartDate, EndDate; SELECT 'noColumns'; -SELECT 1 FROM range_dictionary; +SELECT 1 FROM range_dictionary ORDER BY CountryID, StartDate, EndDate; SELECT 'onlySpecificColumns'; -SELECT CountryID, StartDate, Tax FROM range_dictionary; +SELECT CountryID, StartDate, Tax FROM range_dictionary ORDER BY CountryID, StartDate, EndDate; SELECT 'onlySpecificColumn'; -SELECT Tax FROM range_dictionary; +SELECT Tax FROM range_dictionary ORDER BY CountryID, StartDate, EndDate; DROP TABLE date_table; DROP DICTIONARY range_dictionary; @@ -99,13 +99,13 @@ SELECT dictHas('range_dictionary_nullable', (toUInt64(2), '2'), toDate('2019-05- SELECT dictHas('range_dictionary_nullable', (toUInt64(2), '2'), toDate('2019-05-31')); SELECT 'select columns from dictionary'; SELECT 'allColumns'; -SELECT * FROM range_dictionary_nullable; +SELECT * FROM range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate; SELECT 'noColumns'; -SELECT 1 FROM range_dictionary_nullable; +SELECT 1 FROM range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate; SELECT 'onlySpecificColumns'; -SELECT CountryID, StartDate, Tax FROM range_dictionary_nullable; +SELECT CountryID, StartDate, Tax FROM range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate; SELECT 'onlySpecificColumn'; -SELECT Tax FROM range_dictionary_nullable; +SELECT Tax FROM range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate; DROP TABLE date_table; DROP DICTIONARY range_dictionary_nullable;