Merge pull request #5787 from yandex/aku/podarray-bytes

Fix initial size of some inline PODArray's.
This commit is contained in:
alexey-milovidov 2019-06-29 15:34:00 +03:00 committed by GitHub
commit a0d3db8d9b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 28 additions and 32 deletions

View File

@ -47,8 +47,7 @@ struct AggregateFunctionSequenceMatchData final
using Comparator = ComparePairFirst<std::less>;
bool sorted = true;
static constexpr size_t bytes_in_arena = 64;
PODArray<TimestampEvents, bytes_in_arena, AllocatorWithStackMemory<Allocator<false>, bytes_in_arena>> events_list;
PODArrayWithStackMemory<TimestampEvents, 64> events_list;
void add(const Timestamp timestamp, const Events & events)
{
@ -203,8 +202,7 @@ private:
PatternAction(const PatternActionType type, const std::uint64_t extra = 0) : type{type}, extra{extra} {}
};
static constexpr size_t bytes_on_stack = 64;
using PatternActions = PODArray<PatternAction, bytes_on_stack, AllocatorWithStackMemory<Allocator<false>, bytes_on_stack>>;
using PatternActions = PODArrayWithStackMemory<PatternAction, 64>;
Derived & derived() { return static_cast<Derived &>(*this); }

View File

@ -68,9 +68,8 @@ struct AggregateFunctionTimeSeriesGroupSumData
}
};
static constexpr size_t bytes_on_stack = 128;
typedef std::map<UInt64, Points> Series;
typedef PODArray<DataPoint, bytes_on_stack, AllocatorWithStackMemory<Allocator<false>, bytes_on_stack>> AggSeries;
typedef PODArrayWithStackMemory<DataPoint, 128> AggSeries;
Series ss;
AggSeries result;

View File

@ -35,10 +35,7 @@ template <typename T>
struct AggregateFunctionWindowFunnelData
{
using TimestampEvent = std::pair<T, UInt8>;
static constexpr size_t bytes_on_stack = 64;
using TimestampEvents = PODArray<TimestampEvent, bytes_on_stack, AllocatorWithStackMemory<Allocator<false>, bytes_on_stack>>;
using TimestampEvents = PODArray<TimestampEvent, 64>;
using Comparator = ComparePairFirst;
bool sorted = true;

View File

@ -27,8 +27,7 @@ struct QuantileExact
{
/// The memory will be allocated to several elements at once, so that the state occupies 64 bytes.
static constexpr size_t bytes_in_arena = 64 - sizeof(PODArray<Value>);
using Array = PODArray<Value, bytes_in_arena, AllocatorWithStackMemory<Allocator<false>, bytes_in_arena>>;
using Array = PODArrayWithStackMemory<Value, bytes_in_arena>;
Array array;
void add(const Value & x)

View File

@ -86,8 +86,7 @@ class QuantileTDigest
/// The memory will be allocated to several elements at once, so that the state occupies 64 bytes.
static constexpr size_t bytes_in_arena = 128 - sizeof(PODArray<Centroid>) - sizeof(Count) - sizeof(UInt32);
using Summary = PODArray<Centroid, bytes_in_arena / sizeof(Centroid), AllocatorWithStackMemory<Allocator<false>, bytes_in_arena>>;
using Summary = PODArrayWithStackMemory<Centroid, bytes_in_arena>;
Summary summary;
Count count = 0;

View File

@ -194,8 +194,7 @@ private:
friend void rs_perf_test();
/// We allocate a little memory on the stack - to avoid allocations when there are many objects with a small number of elements.
static constexpr size_t bytes_on_stack = 64;
using Array = DB::PODArray<T, bytes_on_stack / sizeof(T), AllocatorWithStackMemory<Allocator<false>, bytes_on_stack>>;
using Array = DB::PODArrayWithStackMemory<T, 64>;
size_t sample_count;
size_t total_values = 0;

View File

@ -164,9 +164,8 @@ public:
private:
/// We allocate some memory on the stack to avoid allocations when there are many objects with a small number of elements.
static constexpr size_t bytes_on_stack = 64;
using Element = std::pair<T, UInt32>;
using Array = DB::PODArray<Element, bytes_on_stack / sizeof(Element), AllocatorWithStackMemory<Allocator<false>, bytes_on_stack>>;
using Array = DB::PODArray<Element, 64>;
size_t sample_count;
size_t total_values{};

View File

@ -45,7 +45,7 @@ inline constexpr size_t integerRoundUp(size_t value, size_t dividend)
* Only part of the std::vector interface is supported.
*
* The default constructor creates an empty object that does not allocate memory.
* Then the memory is allocated at least INITIAL_SIZE bytes.
* Then the memory is allocated at least initial_bytes bytes.
*
* If you insert elements with push_back, without making a `reserve`, then PODArray is about 2.5 times faster than std::vector.
*
@ -74,7 +74,7 @@ extern const char EmptyPODArray[EmptyPODArraySize];
/** Base class that depend only on size of element, not on element itself.
* You can static_cast to this class if you want to insert some data regardless to the actual type T.
*/
template <size_t ELEMENT_SIZE, size_t INITIAL_SIZE, typename TAllocator, size_t pad_right_, size_t pad_left_>
template <size_t ELEMENT_SIZE, size_t initial_bytes, typename TAllocator, size_t pad_right_, size_t pad_left_>
class PODArrayBase : private boost::noncopyable, private TAllocator /// empty base optimization
{
protected:
@ -161,7 +161,8 @@ protected:
{
// The allocated memory should be multiplication of ELEMENT_SIZE to hold the element, otherwise,
// memory issue such as corruption could appear in edge case.
realloc(std::max(((INITIAL_SIZE - 1) / ELEMENT_SIZE + 1) * ELEMENT_SIZE, minimum_memory_for_elements(1)),
realloc(std::max(integerRoundUp(initial_bytes, ELEMENT_SIZE),
minimum_memory_for_elements(1)),
std::forward<TAllocatorParams>(allocator_params)...);
}
else
@ -257,11 +258,11 @@ public:
}
};
template <typename T, size_t INITIAL_SIZE = 4096, typename TAllocator = Allocator<false>, size_t pad_right_ = 0, size_t pad_left_ = 0>
class PODArray : public PODArrayBase<sizeof(T), INITIAL_SIZE, TAllocator, pad_right_, pad_left_>
template <typename T, size_t initial_bytes = 4096, typename TAllocator = Allocator<false>, size_t pad_right_ = 0, size_t pad_left_ = 0>
class PODArray : public PODArrayBase<sizeof(T), initial_bytes, TAllocator, pad_right_, pad_left_>
{
protected:
using Base = PODArrayBase<sizeof(T), INITIAL_SIZE, TAllocator, pad_right_, pad_left_>;
using Base = PODArrayBase<sizeof(T), initial_bytes, TAllocator, pad_right_, pad_left_>;
T * t_start() { return reinterpret_cast<T *>(this->c_start); }
T * t_end() { return reinterpret_cast<T *>(this->c_end); }
@ -618,17 +619,23 @@ public:
}
};
template <typename T, size_t INITIAL_SIZE, typename TAllocator, size_t pad_right_>
void swap(PODArray<T, INITIAL_SIZE, TAllocator, pad_right_> & lhs, PODArray<T, INITIAL_SIZE, TAllocator, pad_right_> & rhs)
template <typename T, size_t initial_bytes, typename TAllocator, size_t pad_right_>
void swap(PODArray<T, initial_bytes, TAllocator, pad_right_> & lhs, PODArray<T, initial_bytes, TAllocator, pad_right_> & rhs)
{
lhs.swap(rhs);
}
/** For columns. Padding is enough to read and write xmm-register at the address of the last element. */
template <typename T, size_t INITIAL_SIZE = 4096, typename TAllocator = Allocator<false>>
using PaddedPODArray = PODArray<T, INITIAL_SIZE, TAllocator, 15, 16>;
template <typename T, size_t initial_bytes = 4096, typename TAllocator = Allocator<false>>
using PaddedPODArray = PODArray<T, initial_bytes, TAllocator, 15, 16>;
template <typename T, size_t stack_size_in_bytes>
using PODArrayWithStackMemory = PODArray<T, 0, AllocatorWithStackMemory<Allocator<false>, integerRoundUp(stack_size_in_bytes, sizeof(T))>>;
/** A helper for declaring PODArray that uses inline memory.
* The initial size is set to use all the inline bytes, since using less would
* only add some extra allocation calls.
*/
template <typename T, size_t inline_bytes,
size_t rounded_bytes = integerRoundUp(inline_bytes, sizeof(T))>
using PODArrayWithStackMemory = PODArray<T, rounded_bytes,
AllocatorWithStackMemory<Allocator<false>, rounded_bytes>>;
}

View File

@ -91,8 +91,7 @@ struct ExtractBool
struct ExtractRaw
{
static constexpr size_t bytes_on_stack = 64;
using ExpectChars = PODArray<char, bytes_on_stack, AllocatorWithStackMemory<Allocator<false>, bytes_on_stack>>;
using ExpectChars = PODArrayWithStackMemory<char, 64>;
static void extract(const UInt8 * pos, const UInt8 * end, ColumnString::Chars & res_data)
{