Merge pull request #5787 from yandex/aku/podarray-bytes

Fix initial size of some inline PODArray's.
This commit is contained in:
alexey-milovidov 2019-06-29 15:34:00 +03:00 committed by GitHub
commit a0d3db8d9b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 28 additions and 32 deletions

View File

@ -47,8 +47,7 @@ struct AggregateFunctionSequenceMatchData final
using Comparator = ComparePairFirst<std::less>; using Comparator = ComparePairFirst<std::less>;
bool sorted = true; bool sorted = true;
static constexpr size_t bytes_in_arena = 64; PODArrayWithStackMemory<TimestampEvents, 64> events_list;
PODArray<TimestampEvents, bytes_in_arena, AllocatorWithStackMemory<Allocator<false>, bytes_in_arena>> events_list;
void add(const Timestamp timestamp, const Events & events) void add(const Timestamp timestamp, const Events & events)
{ {
@ -203,8 +202,7 @@ private:
PatternAction(const PatternActionType type, const std::uint64_t extra = 0) : type{type}, extra{extra} {} PatternAction(const PatternActionType type, const std::uint64_t extra = 0) : type{type}, extra{extra} {}
}; };
static constexpr size_t bytes_on_stack = 64; using PatternActions = PODArrayWithStackMemory<PatternAction, 64>;
using PatternActions = PODArray<PatternAction, bytes_on_stack, AllocatorWithStackMemory<Allocator<false>, bytes_on_stack>>;
Derived & derived() { return static_cast<Derived &>(*this); } Derived & derived() { return static_cast<Derived &>(*this); }

View File

@ -68,9 +68,8 @@ struct AggregateFunctionTimeSeriesGroupSumData
} }
}; };
static constexpr size_t bytes_on_stack = 128;
typedef std::map<UInt64, Points> Series; typedef std::map<UInt64, Points> Series;
typedef PODArray<DataPoint, bytes_on_stack, AllocatorWithStackMemory<Allocator<false>, bytes_on_stack>> AggSeries; typedef PODArrayWithStackMemory<DataPoint, 128> AggSeries;
Series ss; Series ss;
AggSeries result; AggSeries result;

View File

@ -35,10 +35,7 @@ template <typename T>
struct AggregateFunctionWindowFunnelData struct AggregateFunctionWindowFunnelData
{ {
using TimestampEvent = std::pair<T, UInt8>; using TimestampEvent = std::pair<T, UInt8>;
using TimestampEvents = PODArray<TimestampEvent, 64>;
static constexpr size_t bytes_on_stack = 64;
using TimestampEvents = PODArray<TimestampEvent, bytes_on_stack, AllocatorWithStackMemory<Allocator<false>, bytes_on_stack>>;
using Comparator = ComparePairFirst; using Comparator = ComparePairFirst;
bool sorted = true; bool sorted = true;

View File

@ -27,8 +27,7 @@ struct QuantileExact
{ {
/// The memory will be allocated to several elements at once, so that the state occupies 64 bytes. /// The memory will be allocated to several elements at once, so that the state occupies 64 bytes.
static constexpr size_t bytes_in_arena = 64 - sizeof(PODArray<Value>); static constexpr size_t bytes_in_arena = 64 - sizeof(PODArray<Value>);
using Array = PODArrayWithStackMemory<Value, bytes_in_arena>;
using Array = PODArray<Value, bytes_in_arena, AllocatorWithStackMemory<Allocator<false>, bytes_in_arena>>;
Array array; Array array;
void add(const Value & x) void add(const Value & x)

View File

@ -86,8 +86,7 @@ class QuantileTDigest
/// The memory will be allocated to several elements at once, so that the state occupies 64 bytes. /// The memory will be allocated to several elements at once, so that the state occupies 64 bytes.
static constexpr size_t bytes_in_arena = 128 - sizeof(PODArray<Centroid>) - sizeof(Count) - sizeof(UInt32); static constexpr size_t bytes_in_arena = 128 - sizeof(PODArray<Centroid>) - sizeof(Count) - sizeof(UInt32);
using Summary = PODArrayWithStackMemory<Centroid, bytes_in_arena>;
using Summary = PODArray<Centroid, bytes_in_arena / sizeof(Centroid), AllocatorWithStackMemory<Allocator<false>, bytes_in_arena>>;
Summary summary; Summary summary;
Count count = 0; Count count = 0;

View File

@ -194,8 +194,7 @@ private:
friend void rs_perf_test(); friend void rs_perf_test();
/// We allocate a little memory on the stack - to avoid allocations when there are many objects with a small number of elements. /// We allocate a little memory on the stack - to avoid allocations when there are many objects with a small number of elements.
static constexpr size_t bytes_on_stack = 64; using Array = DB::PODArrayWithStackMemory<T, 64>;
using Array = DB::PODArray<T, bytes_on_stack / sizeof(T), AllocatorWithStackMemory<Allocator<false>, bytes_on_stack>>;
size_t sample_count; size_t sample_count;
size_t total_values = 0; size_t total_values = 0;

View File

@ -164,9 +164,8 @@ public:
private: private:
/// We allocate some memory on the stack to avoid allocations when there are many objects with a small number of elements. /// We allocate some memory on the stack to avoid allocations when there are many objects with a small number of elements.
static constexpr size_t bytes_on_stack = 64;
using Element = std::pair<T, UInt32>; using Element = std::pair<T, UInt32>;
using Array = DB::PODArray<Element, bytes_on_stack / sizeof(Element), AllocatorWithStackMemory<Allocator<false>, bytes_on_stack>>; using Array = DB::PODArray<Element, 64>;
size_t sample_count; size_t sample_count;
size_t total_values{}; size_t total_values{};

View File

@ -45,7 +45,7 @@ inline constexpr size_t integerRoundUp(size_t value, size_t dividend)
* Only part of the std::vector interface is supported. * Only part of the std::vector interface is supported.
* *
* The default constructor creates an empty object that does not allocate memory. * The default constructor creates an empty object that does not allocate memory.
* Then the memory is allocated at least INITIAL_SIZE bytes. * Then the memory is allocated at least initial_bytes bytes.
* *
* If you insert elements with push_back, without making a `reserve`, then PODArray is about 2.5 times faster than std::vector. * If you insert elements with push_back, without making a `reserve`, then PODArray is about 2.5 times faster than std::vector.
* *
@ -74,7 +74,7 @@ extern const char EmptyPODArray[EmptyPODArraySize];
/** Base class that depend only on size of element, not on element itself. /** Base class that depend only on size of element, not on element itself.
* You can static_cast to this class if you want to insert some data regardless to the actual type T. * You can static_cast to this class if you want to insert some data regardless to the actual type T.
*/ */
template <size_t ELEMENT_SIZE, size_t INITIAL_SIZE, typename TAllocator, size_t pad_right_, size_t pad_left_> template <size_t ELEMENT_SIZE, size_t initial_bytes, typename TAllocator, size_t pad_right_, size_t pad_left_>
class PODArrayBase : private boost::noncopyable, private TAllocator /// empty base optimization class PODArrayBase : private boost::noncopyable, private TAllocator /// empty base optimization
{ {
protected: protected:
@ -161,7 +161,8 @@ protected:
{ {
// The allocated memory should be multiplication of ELEMENT_SIZE to hold the element, otherwise, // The allocated memory should be multiplication of ELEMENT_SIZE to hold the element, otherwise,
// memory issue such as corruption could appear in edge case. // memory issue such as corruption could appear in edge case.
realloc(std::max(((INITIAL_SIZE - 1) / ELEMENT_SIZE + 1) * ELEMENT_SIZE, minimum_memory_for_elements(1)), realloc(std::max(integerRoundUp(initial_bytes, ELEMENT_SIZE),
minimum_memory_for_elements(1)),
std::forward<TAllocatorParams>(allocator_params)...); std::forward<TAllocatorParams>(allocator_params)...);
} }
else else
@ -257,11 +258,11 @@ public:
} }
}; };
template <typename T, size_t INITIAL_SIZE = 4096, typename TAllocator = Allocator<false>, size_t pad_right_ = 0, size_t pad_left_ = 0> template <typename T, size_t initial_bytes = 4096, typename TAllocator = Allocator<false>, size_t pad_right_ = 0, size_t pad_left_ = 0>
class PODArray : public PODArrayBase<sizeof(T), INITIAL_SIZE, TAllocator, pad_right_, pad_left_> class PODArray : public PODArrayBase<sizeof(T), initial_bytes, TAllocator, pad_right_, pad_left_>
{ {
protected: protected:
using Base = PODArrayBase<sizeof(T), INITIAL_SIZE, TAllocator, pad_right_, pad_left_>; using Base = PODArrayBase<sizeof(T), initial_bytes, TAllocator, pad_right_, pad_left_>;
T * t_start() { return reinterpret_cast<T *>(this->c_start); } T * t_start() { return reinterpret_cast<T *>(this->c_start); }
T * t_end() { return reinterpret_cast<T *>(this->c_end); } T * t_end() { return reinterpret_cast<T *>(this->c_end); }
@ -618,17 +619,23 @@ public:
} }
}; };
template <typename T, size_t INITIAL_SIZE, typename TAllocator, size_t pad_right_> template <typename T, size_t initial_bytes, typename TAllocator, size_t pad_right_>
void swap(PODArray<T, INITIAL_SIZE, TAllocator, pad_right_> & lhs, PODArray<T, INITIAL_SIZE, TAllocator, pad_right_> & rhs) void swap(PODArray<T, initial_bytes, TAllocator, pad_right_> & lhs, PODArray<T, initial_bytes, TAllocator, pad_right_> & rhs)
{ {
lhs.swap(rhs); lhs.swap(rhs);
} }
/** For columns. Padding is enough to read and write xmm-register at the address of the last element. */ /** For columns. Padding is enough to read and write xmm-register at the address of the last element. */
template <typename T, size_t INITIAL_SIZE = 4096, typename TAllocator = Allocator<false>> template <typename T, size_t initial_bytes = 4096, typename TAllocator = Allocator<false>>
using PaddedPODArray = PODArray<T, INITIAL_SIZE, TAllocator, 15, 16>; using PaddedPODArray = PODArray<T, initial_bytes, TAllocator, 15, 16>;
template <typename T, size_t stack_size_in_bytes> /** A helper for declaring PODArray that uses inline memory.
using PODArrayWithStackMemory = PODArray<T, 0, AllocatorWithStackMemory<Allocator<false>, integerRoundUp(stack_size_in_bytes, sizeof(T))>>; * The initial size is set to use all the inline bytes, since using less would
* only add some extra allocation calls.
*/
template <typename T, size_t inline_bytes,
size_t rounded_bytes = integerRoundUp(inline_bytes, sizeof(T))>
using PODArrayWithStackMemory = PODArray<T, rounded_bytes,
AllocatorWithStackMemory<Allocator<false>, rounded_bytes>>;
} }

View File

@ -91,8 +91,7 @@ struct ExtractBool
struct ExtractRaw struct ExtractRaw
{ {
static constexpr size_t bytes_on_stack = 64; using ExpectChars = PODArrayWithStackMemory<char, 64>;
using ExpectChars = PODArray<char, bytes_on_stack, AllocatorWithStackMemory<Allocator<false>, bytes_on_stack>>;
static void extract(const UInt8 * pos, const UInt8 * end, ColumnString::Chars & res_data) static void extract(const UInt8 * pos, const UInt8 * end, ColumnString::Chars & res_data)
{ {