mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 17:12:03 +00:00
Merge pull request #5787 from yandex/aku/podarray-bytes
Fix initial size of some inline PODArray's.
This commit is contained in:
commit
a0d3db8d9b
@ -47,8 +47,7 @@ struct AggregateFunctionSequenceMatchData final
|
||||
using Comparator = ComparePairFirst<std::less>;
|
||||
|
||||
bool sorted = true;
|
||||
static constexpr size_t bytes_in_arena = 64;
|
||||
PODArray<TimestampEvents, bytes_in_arena, AllocatorWithStackMemory<Allocator<false>, bytes_in_arena>> events_list;
|
||||
PODArrayWithStackMemory<TimestampEvents, 64> events_list;
|
||||
|
||||
void add(const Timestamp timestamp, const Events & events)
|
||||
{
|
||||
@ -203,8 +202,7 @@ private:
|
||||
PatternAction(const PatternActionType type, const std::uint64_t extra = 0) : type{type}, extra{extra} {}
|
||||
};
|
||||
|
||||
static constexpr size_t bytes_on_stack = 64;
|
||||
using PatternActions = PODArray<PatternAction, bytes_on_stack, AllocatorWithStackMemory<Allocator<false>, bytes_on_stack>>;
|
||||
using PatternActions = PODArrayWithStackMemory<PatternAction, 64>;
|
||||
|
||||
Derived & derived() { return static_cast<Derived &>(*this); }
|
||||
|
||||
|
@ -68,9 +68,8 @@ struct AggregateFunctionTimeSeriesGroupSumData
|
||||
}
|
||||
};
|
||||
|
||||
static constexpr size_t bytes_on_stack = 128;
|
||||
typedef std::map<UInt64, Points> Series;
|
||||
typedef PODArray<DataPoint, bytes_on_stack, AllocatorWithStackMemory<Allocator<false>, bytes_on_stack>> AggSeries;
|
||||
typedef PODArrayWithStackMemory<DataPoint, 128> AggSeries;
|
||||
Series ss;
|
||||
AggSeries result;
|
||||
|
||||
|
@ -35,10 +35,7 @@ template <typename T>
|
||||
struct AggregateFunctionWindowFunnelData
|
||||
{
|
||||
using TimestampEvent = std::pair<T, UInt8>;
|
||||
|
||||
static constexpr size_t bytes_on_stack = 64;
|
||||
using TimestampEvents = PODArray<TimestampEvent, bytes_on_stack, AllocatorWithStackMemory<Allocator<false>, bytes_on_stack>>;
|
||||
|
||||
using TimestampEvents = PODArray<TimestampEvent, 64>;
|
||||
using Comparator = ComparePairFirst;
|
||||
|
||||
bool sorted = true;
|
||||
|
@ -27,8 +27,7 @@ struct QuantileExact
|
||||
{
|
||||
/// The memory will be allocated to several elements at once, so that the state occupies 64 bytes.
|
||||
static constexpr size_t bytes_in_arena = 64 - sizeof(PODArray<Value>);
|
||||
|
||||
using Array = PODArray<Value, bytes_in_arena, AllocatorWithStackMemory<Allocator<false>, bytes_in_arena>>;
|
||||
using Array = PODArrayWithStackMemory<Value, bytes_in_arena>;
|
||||
Array array;
|
||||
|
||||
void add(const Value & x)
|
||||
|
@ -86,8 +86,7 @@ class QuantileTDigest
|
||||
|
||||
/// The memory will be allocated to several elements at once, so that the state occupies 64 bytes.
|
||||
static constexpr size_t bytes_in_arena = 128 - sizeof(PODArray<Centroid>) - sizeof(Count) - sizeof(UInt32);
|
||||
|
||||
using Summary = PODArray<Centroid, bytes_in_arena / sizeof(Centroid), AllocatorWithStackMemory<Allocator<false>, bytes_in_arena>>;
|
||||
using Summary = PODArrayWithStackMemory<Centroid, bytes_in_arena>;
|
||||
|
||||
Summary summary;
|
||||
Count count = 0;
|
||||
|
@ -194,8 +194,7 @@ private:
|
||||
friend void rs_perf_test();
|
||||
|
||||
/// We allocate a little memory on the stack - to avoid allocations when there are many objects with a small number of elements.
|
||||
static constexpr size_t bytes_on_stack = 64;
|
||||
using Array = DB::PODArray<T, bytes_on_stack / sizeof(T), AllocatorWithStackMemory<Allocator<false>, bytes_on_stack>>;
|
||||
using Array = DB::PODArrayWithStackMemory<T, 64>;
|
||||
|
||||
size_t sample_count;
|
||||
size_t total_values = 0;
|
||||
|
@ -164,9 +164,8 @@ public:
|
||||
|
||||
private:
|
||||
/// We allocate some memory on the stack to avoid allocations when there are many objects with a small number of elements.
|
||||
static constexpr size_t bytes_on_stack = 64;
|
||||
using Element = std::pair<T, UInt32>;
|
||||
using Array = DB::PODArray<Element, bytes_on_stack / sizeof(Element), AllocatorWithStackMemory<Allocator<false>, bytes_on_stack>>;
|
||||
using Array = DB::PODArray<Element, 64>;
|
||||
|
||||
size_t sample_count;
|
||||
size_t total_values{};
|
||||
|
@ -45,7 +45,7 @@ inline constexpr size_t integerRoundUp(size_t value, size_t dividend)
|
||||
* Only part of the std::vector interface is supported.
|
||||
*
|
||||
* The default constructor creates an empty object that does not allocate memory.
|
||||
* Then the memory is allocated at least INITIAL_SIZE bytes.
|
||||
* Then the memory is allocated at least initial_bytes bytes.
|
||||
*
|
||||
* If you insert elements with push_back, without making a `reserve`, then PODArray is about 2.5 times faster than std::vector.
|
||||
*
|
||||
@ -74,7 +74,7 @@ extern const char EmptyPODArray[EmptyPODArraySize];
|
||||
/** Base class that depend only on size of element, not on element itself.
|
||||
* You can static_cast to this class if you want to insert some data regardless to the actual type T.
|
||||
*/
|
||||
template <size_t ELEMENT_SIZE, size_t INITIAL_SIZE, typename TAllocator, size_t pad_right_, size_t pad_left_>
|
||||
template <size_t ELEMENT_SIZE, size_t initial_bytes, typename TAllocator, size_t pad_right_, size_t pad_left_>
|
||||
class PODArrayBase : private boost::noncopyable, private TAllocator /// empty base optimization
|
||||
{
|
||||
protected:
|
||||
@ -161,7 +161,8 @@ protected:
|
||||
{
|
||||
// The allocated memory should be multiplication of ELEMENT_SIZE to hold the element, otherwise,
|
||||
// memory issue such as corruption could appear in edge case.
|
||||
realloc(std::max(((INITIAL_SIZE - 1) / ELEMENT_SIZE + 1) * ELEMENT_SIZE, minimum_memory_for_elements(1)),
|
||||
realloc(std::max(integerRoundUp(initial_bytes, ELEMENT_SIZE),
|
||||
minimum_memory_for_elements(1)),
|
||||
std::forward<TAllocatorParams>(allocator_params)...);
|
||||
}
|
||||
else
|
||||
@ -257,11 +258,11 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, size_t INITIAL_SIZE = 4096, typename TAllocator = Allocator<false>, size_t pad_right_ = 0, size_t pad_left_ = 0>
|
||||
class PODArray : public PODArrayBase<sizeof(T), INITIAL_SIZE, TAllocator, pad_right_, pad_left_>
|
||||
template <typename T, size_t initial_bytes = 4096, typename TAllocator = Allocator<false>, size_t pad_right_ = 0, size_t pad_left_ = 0>
|
||||
class PODArray : public PODArrayBase<sizeof(T), initial_bytes, TAllocator, pad_right_, pad_left_>
|
||||
{
|
||||
protected:
|
||||
using Base = PODArrayBase<sizeof(T), INITIAL_SIZE, TAllocator, pad_right_, pad_left_>;
|
||||
using Base = PODArrayBase<sizeof(T), initial_bytes, TAllocator, pad_right_, pad_left_>;
|
||||
|
||||
T * t_start() { return reinterpret_cast<T *>(this->c_start); }
|
||||
T * t_end() { return reinterpret_cast<T *>(this->c_end); }
|
||||
@ -618,17 +619,23 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, size_t INITIAL_SIZE, typename TAllocator, size_t pad_right_>
|
||||
void swap(PODArray<T, INITIAL_SIZE, TAllocator, pad_right_> & lhs, PODArray<T, INITIAL_SIZE, TAllocator, pad_right_> & rhs)
|
||||
template <typename T, size_t initial_bytes, typename TAllocator, size_t pad_right_>
|
||||
void swap(PODArray<T, initial_bytes, TAllocator, pad_right_> & lhs, PODArray<T, initial_bytes, TAllocator, pad_right_> & rhs)
|
||||
{
|
||||
lhs.swap(rhs);
|
||||
}
|
||||
|
||||
/** For columns. Padding is enough to read and write xmm-register at the address of the last element. */
|
||||
template <typename T, size_t INITIAL_SIZE = 4096, typename TAllocator = Allocator<false>>
|
||||
using PaddedPODArray = PODArray<T, INITIAL_SIZE, TAllocator, 15, 16>;
|
||||
template <typename T, size_t initial_bytes = 4096, typename TAllocator = Allocator<false>>
|
||||
using PaddedPODArray = PODArray<T, initial_bytes, TAllocator, 15, 16>;
|
||||
|
||||
template <typename T, size_t stack_size_in_bytes>
|
||||
using PODArrayWithStackMemory = PODArray<T, 0, AllocatorWithStackMemory<Allocator<false>, integerRoundUp(stack_size_in_bytes, sizeof(T))>>;
|
||||
/** A helper for declaring PODArray that uses inline memory.
|
||||
* The initial size is set to use all the inline bytes, since using less would
|
||||
* only add some extra allocation calls.
|
||||
*/
|
||||
template <typename T, size_t inline_bytes,
|
||||
size_t rounded_bytes = integerRoundUp(inline_bytes, sizeof(T))>
|
||||
using PODArrayWithStackMemory = PODArray<T, rounded_bytes,
|
||||
AllocatorWithStackMemory<Allocator<false>, rounded_bytes>>;
|
||||
|
||||
}
|
||||
|
@ -91,8 +91,7 @@ struct ExtractBool
|
||||
|
||||
struct ExtractRaw
|
||||
{
|
||||
static constexpr size_t bytes_on_stack = 64;
|
||||
using ExpectChars = PODArray<char, bytes_on_stack, AllocatorWithStackMemory<Allocator<false>, bytes_on_stack>>;
|
||||
using ExpectChars = PODArrayWithStackMemory<char, 64>;
|
||||
|
||||
static void extract(const UInt8 * pos, const UInt8 * end, ColumnString::Chars & res_data)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user