mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 17:12:03 +00:00
Fix loop unrolling
This commit is contained in:
parent
1dc284d9f1
commit
82f03fa8ce
@ -109,6 +109,7 @@ class QuantileTDigest
|
||||
using KeyBits = UInt32;
|
||||
|
||||
static constexpr size_t PART_SIZE_BITS = 8;
|
||||
static constexpr bool DIRECT_WRITE_TO_DESTINATION = false;
|
||||
|
||||
using Transform = RadixSortFloatTransform<KeyBits>;
|
||||
using Allocator = RadixSortMallocAllocator;
|
||||
|
@ -119,6 +119,8 @@ namespace
|
||||
{
|
||||
using Element = ValueWithIndex<T>;
|
||||
using Result = size_t;
|
||||
static constexpr bool DIRECT_WRITE_TO_DESTINATION = true;
|
||||
|
||||
static T & extractKey(Element & elem) { return elem.value; }
|
||||
static size_t extractResult(Element & elem) { return elem.index; }
|
||||
};
|
||||
|
@ -87,6 +87,7 @@ struct RadixSortFloatTraits
|
||||
using KeyBits = std::conditional_t<sizeof(Key) == 8, uint64_t, uint32_t>;
|
||||
|
||||
static constexpr size_t PART_SIZE_BITS = 8; /// With what pieces of the key, in bits, to do one pass - reshuffle of the array.
|
||||
static constexpr bool DIRECT_WRITE_TO_DESTINATION = false;
|
||||
|
||||
/// Converting a key into KeyBits is such that the order relation over the key corresponds to the order relation over KeyBits.
|
||||
using Transform = RadixSortFloatTransform<KeyBits>;
|
||||
@ -131,6 +132,7 @@ struct RadixSortUIntTraits
|
||||
using KeyBits = Key;
|
||||
|
||||
static constexpr size_t PART_SIZE_BITS = 8;
|
||||
static constexpr bool DIRECT_WRITE_TO_DESTINATION = false;
|
||||
|
||||
using Transform = RadixSortIdentityTransform<KeyBits>;
|
||||
using Allocator = RadixSortMallocAllocator;
|
||||
@ -165,6 +167,7 @@ struct RadixSortIntTraits
|
||||
using KeyBits = std::make_unsigned_t<Key>;
|
||||
|
||||
static constexpr size_t PART_SIZE_BITS = 8;
|
||||
static constexpr bool DIRECT_WRITE_TO_DESTINATION = false;
|
||||
|
||||
using Transform = RadixSortSignedTransform<KeyBits>;
|
||||
using Allocator = RadixSortMallocAllocator;
|
||||
@ -328,7 +331,7 @@ public:
|
||||
* In this case it will fill only Result parts of the Element into destination.
|
||||
* It is handy to avoid unnecessary data movements.
|
||||
*/
|
||||
static void executeLSD(Element * arr, size_t size, bool reverse = false, Result * destination = nullptr)
|
||||
static NO_INLINE void executeLSD(Element * arr, size_t size, bool reverse = false, Result * destination = nullptr)
|
||||
{
|
||||
/// If the array is smaller than 256, then it is better to use another algorithm.
|
||||
|
||||
@ -368,10 +371,8 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
bool direct_copy_to_destination = (destination);
|
||||
|
||||
/// Move the elements in the order starting from the least bit piece, and then do a few passes on the number of pieces.
|
||||
for (size_t pass = 0; pass < NUM_PASSES - direct_copy_to_destination; ++pass)
|
||||
for (size_t pass = 0; pass < NUM_PASSES - Traits::DIRECT_WRITE_TO_DESTINATION; ++pass)
|
||||
{
|
||||
Element * writer = pass % 2 ? arr : swap_buffer;
|
||||
Element * reader = pass % 2 ? swap_buffer : arr;
|
||||
@ -390,9 +391,9 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
if (direct_copy_to_destination)
|
||||
if (Traits::DIRECT_WRITE_TO_DESTINATION)
|
||||
{
|
||||
size_t pass = NUM_PASSES - 1;
|
||||
constexpr size_t pass = NUM_PASSES - 1;
|
||||
Result * writer = destination;
|
||||
Element * reader = pass % 2 ? swap_buffer : arr;
|
||||
|
||||
@ -401,8 +402,6 @@ public:
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
size_t pos = getPart(pass, keyToBits(Traits::extractKey(reader[i])));
|
||||
|
||||
/// Place the element on the next free position.
|
||||
writer[size - 1 - (++histograms[pass * HISTOGRAM_SIZE + pos])] = Traits::extractResult(reader[i]);
|
||||
}
|
||||
}
|
||||
@ -411,21 +410,19 @@ public:
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
size_t pos = getPart(pass, keyToBits(Traits::extractKey(reader[i])));
|
||||
|
||||
/// Place the element on the next free position.
|
||||
writer[++histograms[pass * HISTOGRAM_SIZE + pos]] = Traits::extractResult(reader[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (NUM_PASSES % 2)
|
||||
else
|
||||
{
|
||||
/// If the number of passes is odd, the result array is in a temporary buffer. Copy it to the place of the original array.
|
||||
/// NOTE Sometimes it will be more optimal to provide non-destructive interface, that will not modify original array.
|
||||
memcpy(arr, swap_buffer, size * sizeof(Element));
|
||||
}
|
||||
else if (reverse)
|
||||
{
|
||||
std::reverse(arr, arr + size);
|
||||
if (NUM_PASSES % 2)
|
||||
memcpy(arr, swap_buffer, size * sizeof(Element));
|
||||
|
||||
/// This is suboptimal, we can embed it to the last pass.
|
||||
if (reverse)
|
||||
std::reverse(arr, arr + size);
|
||||
}
|
||||
|
||||
allocator.deallocate(swap_buffer, size * sizeof(Element));
|
||||
|
Loading…
Reference in New Issue
Block a user