radix sort for asof join

This commit is contained in:
chertus 2019-04-12 15:48:00 +03:00
parent bf8641c560
commit 4192deb5ad
2 changed files with 56 additions and 19 deletions

View File

@ -64,15 +64,15 @@ struct RadixSortFloatTransform
};
template <typename Float>
template <typename _Element, typename _Key = _Element>
struct RadixSortFloatTraits
{
using Element = Float; /// The type of the element. It can be a structure with a key and some other payload. Or just a key.
using Key = Float; /// The key to sort.
using Element = _Element; /// The type of the element. It can be a structure with a key and some other payload. Or just a key.
using Key = _Key; /// The key to sort.
using CountType = uint32_t; /// Type for calculating histograms. In the case of a known small number of elements, it can be less than size_t.
/// The type to which the key is transformed to do bit operations. This UInt is the same size as the key.
using KeyBits = std::conditional_t<sizeof(Float) == 8, uint64_t, uint32_t>;
using KeyBits = std::conditional_t<sizeof(_Key) == 8, uint64_t, uint32_t>;
static constexpr size_t PART_SIZE_BITS = 8; /// With what pieces of the key, in bits, to do one pass - reshuffle of the array.
@ -85,7 +85,13 @@ struct RadixSortFloatTraits
using Allocator = RadixSortMallocAllocator;
/// The function to get the key from an array element.
static Key & extractKey(Element & elem) { return elem; }
static Key & extractKey(Element & elem)
{
if constexpr (std::is_same_v<Element, Key>)
return elem;
else
return *reinterpret_cast<Key *>(&elem);
}
};
@ -109,13 +115,13 @@ struct RadixSortSignedTransform
};
template <typename UInt>
template <typename _Element, typename _Key = _Element>
struct RadixSortUIntTraits
{
using Element = UInt;
using Key = UInt;
using Element = _Element;
using Key = _Key;
using CountType = uint32_t;
using KeyBits = UInt;
using KeyBits = _Key;
static constexpr size_t PART_SIZE_BITS = 8;
@ -123,16 +129,22 @@ struct RadixSortUIntTraits
using Allocator = RadixSortMallocAllocator;
/// The function to get the key from an array element.
static Key & extractKey(Element & elem) { return elem; }
static Key & extractKey(Element & elem)
{
if constexpr (std::is_same_v<Element, Key>)
return elem;
else
return *reinterpret_cast<Key *>(&elem);
}
};
template <typename Int>
template <typename _Element, typename _Key = _Element>
struct RadixSortIntTraits
{
using Element = Int;
using Key = Int;
using Element = _Element;
using Key = _Key;
using CountType = uint32_t;
using KeyBits = std::make_unsigned_t<Int>;
using KeyBits = std::make_unsigned_t<_Key>;
static constexpr size_t PART_SIZE_BITS = 8;
@ -140,7 +152,13 @@ struct RadixSortIntTraits
using Allocator = RadixSortMallocAllocator;
/// The function to get the key from an array element.
static Key & extractKey(Element & elem) { return elem; }
static Key & extractKey(Element & elem)
{
if constexpr (std::is_same_v<Element, Key>)
return elem;
else
return *reinterpret_cast<Key *>(&elem);
}
};
@ -261,3 +279,16 @@ radixSort(T * arr, size_t size)
return RadixSort<RadixSortFloatTraits<T>>::execute(arr, size);
}
template <typename _Element, typename _Key>
std::enable_if_t<std::is_integral_v<_Key>, void>
radixSort(_Element * arr, size_t size)
{
return RadixSort<RadixSortUIntTraits<_Element, _Key>>::execute(arr, size);
}
template <typename _Element, typename _Key>
std::enable_if_t<std::is_floating_point_v<_Key>, void>
radixSort(_Element * arr, size_t size)
{
return RadixSort<RadixSortFloatTraits<_Element, _Key>>::execute(arr, size);
}

View File

@ -1,5 +1,6 @@
#pragma once
#include <Common/RadixSort.h>
#include <Columns/IColumn.h>
#include <optional>
@ -39,11 +40,11 @@ struct RowRefList : RowRef
* references that can be returned by the lookup methods
*/
template <typename T>
template <typename _Entry, typename _Key>
class SortedLookupVector
{
public:
using Base = std::vector<T>;
using Base = std::vector<_Entry>;
// First stage, insertions into the vector
template <typename U, typename ... TAllocatorParams>
@ -54,7 +55,7 @@ public:
}
// Transition into second stage, ensures that the vector is sorted
typename Base::const_iterator upper_bound(const T & k)
typename Base::const_iterator upper_bound(const _Entry & k)
{
sort();
return std::upper_bound(array.cbegin(), array.cend(), k);
@ -81,7 +82,12 @@ private:
std::lock_guard<std::mutex> l(lock);
if (!sorted.load(std::memory_order_relaxed))
{
#if 0 /// TODO: Check correctness + pref test for 32/64 bits.
if (!array.empty())
radixSort<_Entry, _Key>(&array[0], array.size());
#else
std::sort(array.begin(), array.end());
#endif
sorted.store(true, std::memory_order_release);
}
}
@ -94,7 +100,7 @@ public:
template <typename T>
struct Entry
{
using LookupType = SortedLookupVector<Entry<T>>;
using LookupType = SortedLookupVector<Entry<T>, T>;
using LookupPtr = std::unique_ptr<LookupType>;
T asof_value;
RowRef row_ref;