mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
Merge pull request #55775 from kitaisreal/decimal-column-improve-get-permutation-revert-revert
Improve ColumnDecimal, ColumnVector getPermutation performance using pdqsort with RadixSort fix
This commit is contained in:
commit
0794f1faa7
@ -131,3 +131,29 @@ void sort(RandomIt first, RandomIt last)
|
||||
using comparator = std::less<value_type>;
|
||||
::sort(first, last, comparator());
|
||||
}
|
||||
|
||||
/** Try to fast sort elements for common sorting patterns:
|
||||
* 1. If elements are already sorted.
|
||||
* 2. If elements are already almost sorted.
|
||||
* 3. If elements are already sorted in reverse order.
|
||||
*
|
||||
* Returns true if fast sort was performed or elements were already sorted, false otherwise.
|
||||
*/
|
||||
template <typename RandomIt, typename Compare>
|
||||
bool trySort(RandomIt first, RandomIt last, Compare compare)
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
::shuffle(first, last);
|
||||
#endif
|
||||
|
||||
ComparatorWrapper<Compare> compare_wrapper = compare;
|
||||
return ::pdqsort_try_sort(first, last, compare_wrapper);
|
||||
}
|
||||
|
||||
template <typename RandomIt>
|
||||
bool trySort(RandomIt first, RandomIt last)
|
||||
{
|
||||
using value_type = typename std::iterator_traits<RandomIt>::value_type;
|
||||
using comparator = std::less<value_type>;
|
||||
return ::trySort(first, last, comparator());
|
||||
}
|
||||
|
@ -54,8 +54,10 @@ namespace pdqsort_detail {
|
||||
block_size = 64,
|
||||
|
||||
// Cacheline size, assumes power of two.
|
||||
cacheline_size = 64
|
||||
cacheline_size = 64,
|
||||
|
||||
/// Try sort allowed iterations
|
||||
try_sort_iterations = 3,
|
||||
};
|
||||
|
||||
#if __cplusplus >= 201103L
|
||||
@ -501,6 +503,167 @@ namespace pdqsort_detail {
|
||||
leftmost = false;
|
||||
}
|
||||
}
|
||||
|
||||
template<class Iter, class Compare, bool Branchless>
|
||||
inline bool pdqsort_try_sort_loop(Iter begin,
|
||||
Iter end,
|
||||
Compare comp,
|
||||
size_t bad_allowed,
|
||||
size_t iterations_allowed,
|
||||
bool force_sort = false,
|
||||
bool leftmost = true) {
|
||||
typedef typename std::iterator_traits<Iter>::difference_type diff_t;
|
||||
|
||||
// Use a while loop for tail recursion elimination.
|
||||
while (true) {
|
||||
if (!force_sort && iterations_allowed == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
diff_t size = end - begin;
|
||||
|
||||
// Insertion sort is faster for small arrays.
|
||||
if (size < insertion_sort_threshold) {
|
||||
if (leftmost) insertion_sort(begin, end, comp);
|
||||
else unguarded_insertion_sort(begin, end, comp);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Choose pivot as median of 3 or pseudomedian of 9.
|
||||
diff_t s2 = size / 2;
|
||||
if (size > ninther_threshold) {
|
||||
sort3(begin, begin + s2, end - 1, comp);
|
||||
sort3(begin + 1, begin + (s2 - 1), end - 2, comp);
|
||||
sort3(begin + 2, begin + (s2 + 1), end - 3, comp);
|
||||
sort3(begin + (s2 - 1), begin + s2, begin + (s2 + 1), comp);
|
||||
std::iter_swap(begin, begin + s2);
|
||||
} else sort3(begin + s2, begin, end - 1, comp);
|
||||
|
||||
// If *(begin - 1) is the end of the right partition of a previous partition operation
|
||||
// there is no element in [begin, end) that is smaller than *(begin - 1). Then if our
|
||||
// pivot compares equal to *(begin - 1) we change strategy, putting equal elements in
|
||||
// the left partition, greater elements in the right partition. We do not have to
|
||||
// recurse on the left partition, since it's sorted (all equal).
|
||||
if (!leftmost && !comp(*(begin - 1), *begin)) {
|
||||
begin = partition_left(begin, end, comp) + 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Partition and get results.
|
||||
std::pair<Iter, bool> part_result =
|
||||
Branchless ? partition_right_branchless(begin, end, comp)
|
||||
: partition_right(begin, end, comp);
|
||||
Iter pivot_pos = part_result.first;
|
||||
bool already_partitioned = part_result.second;
|
||||
|
||||
// Check for a highly unbalanced partition.
|
||||
diff_t l_size = pivot_pos - begin;
|
||||
diff_t r_size = end - (pivot_pos + 1);
|
||||
bool highly_unbalanced = l_size < size / 8 || r_size < size / 8;
|
||||
|
||||
// If we got a highly unbalanced partition we shuffle elements to break many patterns.
|
||||
if (highly_unbalanced) {
|
||||
if (!force_sort) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// If we had too many bad partitions, switch to heapsort to guarantee O(n log n).
|
||||
if (--bad_allowed == 0) {
|
||||
std::make_heap(begin, end, comp);
|
||||
std::sort_heap(begin, end, comp);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (l_size >= insertion_sort_threshold) {
|
||||
std::iter_swap(begin, begin + l_size / 4);
|
||||
std::iter_swap(pivot_pos - 1, pivot_pos - l_size / 4);
|
||||
|
||||
if (l_size > ninther_threshold) {
|
||||
std::iter_swap(begin + 1, begin + (l_size / 4 + 1));
|
||||
std::iter_swap(begin + 2, begin + (l_size / 4 + 2));
|
||||
std::iter_swap(pivot_pos - 2, pivot_pos - (l_size / 4 + 1));
|
||||
std::iter_swap(pivot_pos - 3, pivot_pos - (l_size / 4 + 2));
|
||||
}
|
||||
}
|
||||
|
||||
if (r_size >= insertion_sort_threshold) {
|
||||
std::iter_swap(pivot_pos + 1, pivot_pos + (1 + r_size / 4));
|
||||
std::iter_swap(end - 1, end - r_size / 4);
|
||||
|
||||
if (r_size > ninther_threshold) {
|
||||
std::iter_swap(pivot_pos + 2, pivot_pos + (2 + r_size / 4));
|
||||
std::iter_swap(pivot_pos + 3, pivot_pos + (3 + r_size / 4));
|
||||
std::iter_swap(end - 2, end - (1 + r_size / 4));
|
||||
std::iter_swap(end - 3, end - (2 + r_size / 4));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// If we were decently balanced and we tried to sort an already partitioned
|
||||
// sequence try to use insertion sort.
|
||||
if (already_partitioned && partial_insertion_sort(begin, pivot_pos, comp)
|
||||
&& partial_insertion_sort(pivot_pos + 1, end, comp)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Sort the left partition first using recursion and do tail recursion elimination for
|
||||
// the right-hand partition.
|
||||
if (pdqsort_try_sort_loop<Iter, Compare, Branchless>(begin,
|
||||
pivot_pos,
|
||||
comp,
|
||||
bad_allowed,
|
||||
iterations_allowed - 1,
|
||||
force_sort,
|
||||
leftmost)) {
|
||||
force_sort = true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
--iterations_allowed;
|
||||
begin = pivot_pos + 1;
|
||||
leftmost = false;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template<class Iter, class Compare, bool Branchless>
|
||||
inline bool pdqsort_try_sort_impl(Iter begin, Iter end, Compare comp, size_t bad_allowed)
|
||||
{
|
||||
typedef typename std::iterator_traits<Iter>::difference_type diff_t;
|
||||
|
||||
static constexpr size_t iterations_allowed = pdqsort_detail::try_sort_iterations;
|
||||
static constexpr size_t num_to_try = 16;
|
||||
|
||||
diff_t size = end - begin;
|
||||
|
||||
if (size > num_to_try * 10)
|
||||
{
|
||||
size_t out_of_order_elements = 0;
|
||||
|
||||
for (size_t i = 1; i < num_to_try; ++i)
|
||||
{
|
||||
diff_t offset = size / num_to_try;
|
||||
|
||||
diff_t prev_position = offset * (i - 1);
|
||||
diff_t curr_position = offset * i;
|
||||
diff_t next_position = offset * (i + 1) - 1;
|
||||
|
||||
bool prev_less_than_curr = comp(*(begin + prev_position), *(begin + curr_position));
|
||||
bool curr_less_than_next = comp(*(begin + curr_position), *(begin + next_position));
|
||||
if ((prev_less_than_curr && curr_less_than_next) || (!prev_less_than_curr && !curr_less_than_next))
|
||||
continue;
|
||||
|
||||
++out_of_order_elements;
|
||||
if (out_of_order_elements > iterations_allowed)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return pdqsort_try_sort_loop<Iter, Compare, Branchless>(begin, end, comp, bad_allowed, iterations_allowed);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -538,6 +701,41 @@ inline void pdqsort_branchless(Iter begin, Iter end) {
|
||||
pdqsort_branchless(begin, end, std::less<T>());
|
||||
}
|
||||
|
||||
template<class Iter, class Compare>
|
||||
inline bool pdqsort_try_sort(Iter begin, Iter end, Compare comp) {
|
||||
if (begin == end) return true;
|
||||
|
||||
#if __cplusplus >= 201103L
|
||||
return pdqsort_detail::pdqsort_try_sort_impl<Iter, Compare,
|
||||
pdqsort_detail::is_default_compare<typename std::decay<Compare>::type>::value &&
|
||||
std::is_arithmetic<typename std::iterator_traits<Iter>::value_type>::value>(
|
||||
begin, end, comp, pdqsort_detail::log2(end - begin));
|
||||
#else
|
||||
return pdqsort_detail::pdqsort_try_sort_impl<Iter, Compare, false>(
|
||||
begin, end, comp, pdqsort_detail::log2(end - begin));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<class Iter>
|
||||
inline bool pdqsort_try_sort(Iter begin, Iter end) {
|
||||
typedef typename std::iterator_traits<Iter>::value_type T;
|
||||
return pdqsort_try_sort(begin, end, std::less<T>());
|
||||
}
|
||||
|
||||
template<class Iter, class Compare>
|
||||
inline bool pdqsort_try_sort_branchless(Iter begin, Iter end, Compare comp) {
|
||||
if (begin == end) return true;
|
||||
|
||||
return pdqsort_detail::pdqsort_try_sort_impl<Iter, Compare, true>(
|
||||
begin, end, comp, pdqsort_detail::log2(end - begin));
|
||||
}
|
||||
|
||||
template<class Iter>
|
||||
inline bool pdqsort_try_sort_branchless(Iter begin, Iter end) {
|
||||
typedef typename std::iterator_traits<Iter>::value_type T;
|
||||
return pdqsort_try_sort_branchless(begin, end, std::less<T>());
|
||||
}
|
||||
|
||||
|
||||
#undef PDQSORT_PREFER_MOVE
|
||||
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/WeakHash.h>
|
||||
#include <Common/HashTable/Hash.h>
|
||||
#include <Common/RadixSort.h>
|
||||
|
||||
#include <base/unaligned.h>
|
||||
#include <base/sort.h>
|
||||
@ -15,6 +16,7 @@
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
#include <Columns/ColumnCompressed.h>
|
||||
#include <Columns/MaskOperations.h>
|
||||
#include <Columns/RadixSortHelper.h>
|
||||
#include <Processors/Transforms/ColumnGathererTransform.h>
|
||||
|
||||
|
||||
@ -159,6 +161,59 @@ void ColumnDecimal<T>::getPermutation(IColumn::PermutationSortDirection directio
|
||||
return data[lhs] > data[rhs];
|
||||
};
|
||||
|
||||
size_t data_size = data.size();
|
||||
res.resize(data_size);
|
||||
|
||||
if (limit >= data_size)
|
||||
limit = 0;
|
||||
|
||||
for (size_t i = 0; i < data_size; ++i)
|
||||
res[i] = i;
|
||||
|
||||
if constexpr (is_arithmetic_v<NativeT> && !is_big_int_v<NativeT>)
|
||||
{
|
||||
if (!limit)
|
||||
{
|
||||
/// A case for radix sort
|
||||
/// LSD RadixSort is stable
|
||||
|
||||
bool reverse = direction == IColumn::PermutationSortDirection::Descending;
|
||||
bool ascending = direction == IColumn::PermutationSortDirection::Ascending;
|
||||
bool sort_is_stable = stability == IColumn::PermutationSortStability::Stable;
|
||||
|
||||
/// TODO: LSD RadixSort is currently not stable if direction is descending
|
||||
bool use_radix_sort = (sort_is_stable && ascending) || !sort_is_stable;
|
||||
|
||||
/// Thresholds on size. Lower threshold is arbitrary. Upper threshold is chosen by the type for histogram counters.
|
||||
if (data_size >= 256 && data_size <= std::numeric_limits<UInt32>::max() && use_radix_sort)
|
||||
{
|
||||
for (size_t i = 0; i < data_size; ++i)
|
||||
res[i] = i;
|
||||
|
||||
bool try_sort = false;
|
||||
|
||||
if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable)
|
||||
try_sort = trySort(res.begin(), res.end(), comparator_ascending);
|
||||
else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable)
|
||||
try_sort = trySort(res.begin(), res.end(), comparator_ascending_stable);
|
||||
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Unstable)
|
||||
try_sort = trySort(res.begin(), res.end(), comparator_descending);
|
||||
else
|
||||
try_sort = trySort(res.begin(), res.end(), comparator_descending_stable);
|
||||
|
||||
if (try_sort)
|
||||
return;
|
||||
|
||||
PaddedPODArray<ValueWithIndex<NativeT>> pairs(data_size);
|
||||
for (UInt32 i = 0; i < static_cast<UInt32>(data_size); ++i)
|
||||
pairs[i] = {data[i].value, i};
|
||||
|
||||
RadixSort<RadixSortTraits<NativeT>>::executeLSD(pairs.data(), data_size, reverse, res.data());
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable)
|
||||
this->getPermutationImpl(limit, res, comparator_ascending, DefaultSort(), DefaultPartialSort());
|
||||
else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable)
|
||||
@ -191,7 +246,37 @@ void ColumnDecimal<T>::updatePermutation(IColumn::PermutationSortDirection direc
|
||||
return data[lhs] < data[rhs];
|
||||
};
|
||||
auto equals_comparator = [this](size_t lhs, size_t rhs) { return data[lhs] == data[rhs]; };
|
||||
auto sort = [](auto begin, auto end, auto pred) { ::sort(begin, end, pred); };
|
||||
auto sort = [&](auto begin, auto end, auto pred)
|
||||
{
|
||||
bool reverse = direction == IColumn::PermutationSortDirection::Descending;
|
||||
bool ascending = direction == IColumn::PermutationSortDirection::Ascending;
|
||||
bool sort_is_stable = stability == IColumn::PermutationSortStability::Stable;
|
||||
|
||||
/// TODO: LSD RadixSort is currently not stable if direction is descending
|
||||
bool use_radix_sort = (sort_is_stable && ascending) || !sort_is_stable;
|
||||
size_t size = end - begin;
|
||||
|
||||
if (size >= 256 && size <= std::numeric_limits<UInt32>::max() && use_radix_sort)
|
||||
{
|
||||
bool try_sort = trySort(begin, end, pred);
|
||||
if (try_sort)
|
||||
return;
|
||||
|
||||
PaddedPODArray<ValueWithIndex<NativeT>> pairs(size);
|
||||
size_t index = 0;
|
||||
|
||||
for (auto * it = begin; it != end; ++it)
|
||||
{
|
||||
pairs[index] = {data[*it].value, static_cast<UInt32>(*it)};
|
||||
++index;
|
||||
}
|
||||
|
||||
RadixSort<RadixSortTraits<NativeT>>::executeLSD(pairs.data(), size, reverse, begin);
|
||||
return;
|
||||
}
|
||||
|
||||
::sort(begin, end, pred);
|
||||
};
|
||||
auto partial_sort = [](auto begin, auto mid, auto end, auto pred) { ::partial_sort(begin, mid, end, pred); };
|
||||
|
||||
if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable)
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
#include <Columns/ColumnCompressed.h>
|
||||
#include <Columns/MaskOperations.h>
|
||||
#include <Columns/RadixSortHelper.h>
|
||||
#include <Processors/Transforms/ColumnGathererTransform.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/Arena.h>
|
||||
@ -192,26 +193,6 @@ struct ColumnVector<T>::equals
|
||||
bool operator()(size_t lhs, size_t rhs) const { return CompareHelper<T>::equals(parent.data[lhs], parent.data[rhs], nan_direction_hint); }
|
||||
};
|
||||
|
||||
namespace
|
||||
{
|
||||
template <typename T>
|
||||
struct ValueWithIndex
|
||||
{
|
||||
T value;
|
||||
UInt32 index;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct RadixSortTraits : RadixSortNumTraits<T>
|
||||
{
|
||||
using Element = ValueWithIndex<T>;
|
||||
using Result = size_t;
|
||||
|
||||
static T & extractKey(Element & elem) { return elem.value; }
|
||||
static size_t extractResult(Element & elem) { return elem.index; }
|
||||
};
|
||||
}
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
|
||||
template <typename T>
|
||||
@ -254,35 +235,25 @@ template <typename T>
|
||||
void ColumnVector<T>::getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
|
||||
size_t limit, int nan_direction_hint, IColumn::Permutation & res) const
|
||||
{
|
||||
size_t s = data.size();
|
||||
res.resize(s);
|
||||
size_t data_size = data.size();
|
||||
res.resize(data_size);
|
||||
|
||||
if (s == 0)
|
||||
if (data_size == 0)
|
||||
return;
|
||||
|
||||
if (limit >= s)
|
||||
if (limit >= data_size)
|
||||
limit = 0;
|
||||
|
||||
if (limit)
|
||||
{
|
||||
for (size_t i = 0; i < s; ++i)
|
||||
res[i] = i;
|
||||
for (size_t i = 0; i < data_size; ++i)
|
||||
res[i] = i;
|
||||
|
||||
if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable)
|
||||
::partial_sort(res.begin(), res.begin() + limit, res.end(), less(*this, nan_direction_hint));
|
||||
else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable)
|
||||
::partial_sort(res.begin(), res.begin() + limit, res.end(), less_stable(*this, nan_direction_hint));
|
||||
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Unstable)
|
||||
::partial_sort(res.begin(), res.begin() + limit, res.end(), greater(*this, nan_direction_hint));
|
||||
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Stable)
|
||||
::partial_sort(res.begin(), res.begin() + limit, res.end(), greater_stable(*this, nan_direction_hint));
|
||||
}
|
||||
else
|
||||
if constexpr (is_arithmetic_v<T> && !is_big_int_v<T>)
|
||||
{
|
||||
/// A case for radix sort
|
||||
/// LSD RadixSort is stable
|
||||
if constexpr (is_arithmetic_v<T> && !is_big_int_v<T>)
|
||||
if (!limit)
|
||||
{
|
||||
/// A case for radix sort
|
||||
/// LSD RadixSort is stable
|
||||
|
||||
bool reverse = direction == IColumn::PermutationSortDirection::Descending;
|
||||
bool ascending = direction == IColumn::PermutationSortDirection::Ascending;
|
||||
bool sort_is_stable = stability == IColumn::PermutationSortStability::Stable;
|
||||
@ -291,13 +262,27 @@ void ColumnVector<T>::getPermutation(IColumn::PermutationSortDirection direction
|
||||
bool use_radix_sort = (sort_is_stable && ascending && !std::is_floating_point_v<T>) || !sort_is_stable;
|
||||
|
||||
/// Thresholds on size. Lower threshold is arbitrary. Upper threshold is chosen by the type for histogram counters.
|
||||
if (s >= 256 && s <= std::numeric_limits<UInt32>::max() && use_radix_sort)
|
||||
if (data_size >= 256 && data_size <= std::numeric_limits<UInt32>::max() && use_radix_sort)
|
||||
{
|
||||
PaddedPODArray<ValueWithIndex<T>> pairs(s);
|
||||
for (UInt32 i = 0; i < static_cast<UInt32>(s); ++i)
|
||||
bool try_sort = false;
|
||||
|
||||
if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable)
|
||||
try_sort = trySort(res.begin(), res.end(), less(*this, nan_direction_hint));
|
||||
else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable)
|
||||
try_sort = trySort(res.begin(), res.end(), less_stable(*this, nan_direction_hint));
|
||||
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Unstable)
|
||||
try_sort = trySort(res.begin(), res.end(), greater(*this, nan_direction_hint));
|
||||
else
|
||||
try_sort = trySort(res.begin(), res.end(), greater_stable(*this, nan_direction_hint));
|
||||
|
||||
if (try_sort)
|
||||
return;
|
||||
|
||||
PaddedPODArray<ValueWithIndex<T>> pairs(data_size);
|
||||
for (UInt32 i = 0; i < static_cast<UInt32>(data_size); ++i)
|
||||
pairs[i] = {data[i], i};
|
||||
|
||||
RadixSort<RadixSortTraits<T>>::executeLSD(pairs.data(), s, reverse, res.data());
|
||||
RadixSort<RadixSortTraits<T>>::executeLSD(pairs.data(), data_size, reverse, res.data());
|
||||
|
||||
/// Radix sort treats all NaNs to be greater than all numbers.
|
||||
/// If the user needs the opposite, we must move them accordingly.
|
||||
@ -305,9 +290,9 @@ void ColumnVector<T>::getPermutation(IColumn::PermutationSortDirection direction
|
||||
{
|
||||
size_t nans_to_move = 0;
|
||||
|
||||
for (size_t i = 0; i < s; ++i)
|
||||
for (size_t i = 0; i < data_size; ++i)
|
||||
{
|
||||
if (isNaN(data[res[reverse ? i : s - 1 - i]]))
|
||||
if (isNaN(data[res[reverse ? i : data_size - 1 - i]]))
|
||||
++nans_to_move;
|
||||
else
|
||||
break;
|
||||
@ -315,38 +300,35 @@ void ColumnVector<T>::getPermutation(IColumn::PermutationSortDirection direction
|
||||
|
||||
if (nans_to_move)
|
||||
{
|
||||
std::rotate(std::begin(res), std::begin(res) + (reverse ? nans_to_move : s - nans_to_move), std::end(res));
|
||||
std::rotate(std::begin(res), std::begin(res) + (reverse ? nans_to_move : data_size - nans_to_move), std::end(res));
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/// Default sorting algorithm.
|
||||
for (size_t i = 0; i < s; ++i)
|
||||
res[i] = i;
|
||||
|
||||
if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable)
|
||||
::sort(res.begin(), res.end(), less(*this, nan_direction_hint));
|
||||
else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable)
|
||||
::sort(res.begin(), res.end(), less_stable(*this, nan_direction_hint));
|
||||
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Unstable)
|
||||
::sort(res.begin(), res.end(), greater(*this, nan_direction_hint));
|
||||
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Stable)
|
||||
::sort(res.begin(), res.end(), greater_stable(*this, nan_direction_hint));
|
||||
}
|
||||
|
||||
if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable)
|
||||
this->getPermutationImpl(limit, res, less(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort());
|
||||
else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable)
|
||||
this->getPermutationImpl(limit, res, less_stable(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort());
|
||||
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Unstable)
|
||||
this->getPermutationImpl(limit, res, greater(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort());
|
||||
else
|
||||
this->getPermutationImpl(limit, res, greater_stable(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ColumnVector<T>::updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
|
||||
size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
|
||||
{
|
||||
bool reverse = direction == IColumn::PermutationSortDirection::Descending;
|
||||
bool ascending = direction == IColumn::PermutationSortDirection::Ascending;
|
||||
bool sort_is_stable = stability == IColumn::PermutationSortStability::Stable;
|
||||
|
||||
auto sort = [&](auto begin, auto end, auto pred)
|
||||
{
|
||||
bool reverse = direction == IColumn::PermutationSortDirection::Descending;
|
||||
bool ascending = direction == IColumn::PermutationSortDirection::Ascending;
|
||||
bool sort_is_stable = stability == IColumn::PermutationSortStability::Stable;
|
||||
|
||||
/// A case for radix sort
|
||||
if constexpr (is_arithmetic_v<T> && !is_big_int_v<T>)
|
||||
{
|
||||
@ -357,6 +339,10 @@ void ColumnVector<T>::updatePermutation(IColumn::PermutationSortDirection direct
|
||||
/// Thresholds on size. Lower threshold is arbitrary. Upper threshold is chosen by the type for histogram counters.
|
||||
if (size >= 256 && size <= std::numeric_limits<UInt32>::max() && use_radix_sort)
|
||||
{
|
||||
bool try_sort = trySort(begin, end, pred);
|
||||
if (try_sort)
|
||||
return;
|
||||
|
||||
PaddedPODArray<ValueWithIndex<T>> pairs(size);
|
||||
size_t index = 0;
|
||||
|
||||
|
25
src/Columns/RadixSortHelper.h
Normal file
25
src/Columns/RadixSortHelper.h
Normal file
@ -0,0 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/RadixSort.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
template <typename T>
|
||||
struct ValueWithIndex
|
||||
{
|
||||
T value;
|
||||
UInt32 index;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct RadixSortTraits : RadixSortNumTraits<T>
|
||||
{
|
||||
using Element = ValueWithIndex<T>;
|
||||
using Result = size_t;
|
||||
|
||||
static T & extractKey(Element & elem) { return elem.value; }
|
||||
static size_t extractResult(Element & elem) { return elem.index; }
|
||||
};
|
||||
|
||||
}
|
@ -18,15 +18,29 @@
|
||||
<value>merge_tree_insert_6</value>
|
||||
</values>
|
||||
</substitution>
|
||||
|
||||
<substitution>
|
||||
<name>decimal_primary_key_table_name</name>
|
||||
<values>
|
||||
<value>merge_tree_insert_7</value>
|
||||
<value>merge_tree_insert_8</value>
|
||||
<value>merge_tree_insert_9</value>
|
||||
</values>
|
||||
</substitution>
|
||||
</substitutions>
|
||||
|
||||
<create_query>CREATE TABLE merge_tree_insert_1 (value_1 UInt64, value_2 UInt64, value_3 UInt64) ENGINE = MergeTree ORDER BY (value_1)</create_query>
|
||||
<create_query>CREATE TABLE merge_tree_insert_2 (value_1 UInt64, value_2 UInt64, value_3 UInt64) ENGINE = MergeTree ORDER BY (value_1, value_2)</create_query>
|
||||
<create_query>CREATE TABLE merge_tree_insert_3 (value_1 UInt64, value_2 UInt64, value_3 UInt64) ENGINE = MergeTree ORDER BY (value_1, value_2, value_3)</create_query>
|
||||
|
||||
<create_query>CREATE TABLE merge_tree_insert_4 (value_1 String, value_2 String, value_3 String) ENGINE = MergeTree ORDER BY (value_1)</create_query>
|
||||
<create_query>CREATE TABLE merge_tree_insert_5 (value_1 String, value_2 String, value_3 String) ENGINE = MergeTree ORDER BY (value_1, value_2)</create_query>
|
||||
<create_query>CREATE TABLE merge_tree_insert_6 (value_1 String, value_2 String, value_3 String) ENGINE = MergeTree ORDER BY (value_1, value_2, value_3)</create_query>
|
||||
|
||||
<create_query>CREATE TABLE merge_tree_insert_7 (value_1 Decimal64(8), value_2 Decimal64(8), value_3 Decimal64(8)) ENGINE = MergeTree ORDER BY (value_1)</create_query>
|
||||
<create_query>CREATE TABLE merge_tree_insert_8 (value_1 Decimal64(8), value_2 Decimal64(8), value_3 Decimal64(8)) ENGINE = MergeTree ORDER BY (value_1, value_2)</create_query>
|
||||
<create_query>CREATE TABLE merge_tree_insert_9 (value_1 Decimal64(8), value_2 Decimal64(8), value_3 Decimal64(8)) ENGINE = MergeTree ORDER BY (value_1, value_2, value_3)</create_query>
|
||||
|
||||
<query>INSERT INTO {integer_primary_key_table_name} SELECT rand64(0), rand64(1), rand64(2) FROM system.numbers LIMIT 500000</query>
|
||||
<query>INSERT INTO {integer_primary_key_table_name} SELECT rand64(0), rand64(1), rand64(2) FROM system.numbers LIMIT 1000000</query>
|
||||
<query>INSERT INTO {integer_primary_key_table_name} SELECT rand64(0), rand64(1), rand64(2) FROM system.numbers LIMIT 1500000</query>
|
||||
@ -35,7 +49,12 @@
|
||||
<query>INSERT INTO {string_primary_key_table_name} SELECT toString(rand64(0)), toString(rand64(1)), toString(rand64(2)) FROM system.numbers LIMIT 1000000</query>
|
||||
<query>INSERT INTO {string_primary_key_table_name} SELECT toString(rand64(0)), toString(rand64(1)), toString(rand64(2)) FROM system.numbers LIMIT 1500000</query>
|
||||
|
||||
<query>INSERT INTO {decimal_primary_key_table_name} SELECT rand64(0) % 1000000, rand64(1) % 1500000, rand64(2) % 2000000 FROM system.numbers LIMIT 500000</query>
|
||||
<query>INSERT INTO {decimal_primary_key_table_name} SELECT rand64(0) % 1000000, rand64(1) % 1500000, rand64(2) % 2000000 FROM system.numbers LIMIT 1000000</query>
|
||||
<query>INSERT INTO {decimal_primary_key_table_name} SELECT rand64(0) % 1000000, rand64(1) % 1500000, rand64(2) % 2000000 FROM system.numbers LIMIT 1500000</query>
|
||||
|
||||
<drop_query>DROP TABLE IF EXISTS {integer_primary_key_table_name}</drop_query>
|
||||
<drop_query>DROP TABLE IF EXISTS {string_primary_key_table_name}</drop_query>
|
||||
<drop_query>DROP TABLE IF EXISTS {decimal_primary_key_table_name}</drop_query>
|
||||
|
||||
</test>
|
||||
|
28
tests/performance/sort_patterns.xml
Normal file
28
tests/performance/sort_patterns.xml
Normal file
@ -0,0 +1,28 @@
|
||||
<test>
|
||||
<substitutions>
|
||||
<substitution>
|
||||
<name>integer_type</name>
|
||||
<values>
|
||||
<value>UInt32</value>
|
||||
<value>UInt64</value>
|
||||
</values>
|
||||
</substitution>
|
||||
<substitution>
|
||||
<name>sort_expression</name>
|
||||
<values>
|
||||
<value>key</value>
|
||||
<value>key, value</value>
|
||||
<value>key DESC</value>
|
||||
<value>key DESC, value DESC</value>
|
||||
</values>
|
||||
</substitution>
|
||||
</substitutions>
|
||||
|
||||
<create_query>CREATE TABLE sequential_{integer_type} (key {integer_type}, value {integer_type}) Engine = Memory</create_query>
|
||||
|
||||
<fill_query>INSERT INTO sequential_{integer_type} SELECT number, number FROM numbers(500000000)</fill_query>
|
||||
|
||||
<query>SELECT key, value FROM sequential_{integer_type} ORDER BY {sort_expression} FORMAT Null</query>
|
||||
|
||||
<drop_query>DROP TABLE IF EXISTS sequential_{integer_type}</drop_query>
|
||||
</test>
|
Loading…
Reference in New Issue
Block a user