2013-09-15 10:53:53 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <string.h>
|
|
|
|
#include <cstddef>
|
2019-03-08 12:59:06 +00:00
|
|
|
#include <cassert>
|
2013-09-15 10:53:53 +00:00
|
|
|
#include <algorithm>
|
|
|
|
#include <memory>
|
|
|
|
|
|
|
|
#include <boost/noncopyable.hpp>
|
|
|
|
|
2015-09-29 19:19:54 +00:00
|
|
|
#include <common/strong_typedef.h>
|
2013-09-15 10:53:53 +00:00
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Common/Allocator.h>
|
|
|
|
#include <Common/Exception.h>
|
|
|
|
#include <Common/BitHelpers.h>
|
Padding for IO buffers.
Testing data
```
select 'aaaaaaaa','bbbbbbbb','cccccccc','dddddddd','eeeeeeee','ffffffff','gggg','hhh' from numbers(3000000) into outfile '/tmp/test.tsv'
```
Testing command
```
echo "select count() from file('/tmp/test.tsv', CSV, 'a String, b String, c String, d String, e String, f String, g String, h String') where not ignore(e)" | clickhouse-benchmark
```
TSV parser has less overhead than CSV, using it would better unveil the benefits of memcpySmall.
Before
```
QPS: 1.662, RPS: 4985463.906, MiB/s: 603.823, result RPS: 1.662, result MiB/s: 0.000.
0.000% 0.559 sec.
10.000% 0.564 sec.
20.000% 0.568 sec.
30.000% 0.572 sec.
40.000% 0.575 sec.
50.000% 0.581 sec.
60.000% 0.592 sec.
70.000% 0.624 sec.
80.000% 0.639 sec.
90.000% 0.664 sec.
95.000% 0.686 sec.
99.000% 0.711 sec.
99.900% 0.715 sec.
99.990% 0.716 sec.
```
After
```
QPS: 1.861, RPS: 5582303.107, MiB/s: 676.110, result RPS: 1.861, result MiB/s: 0.000.
0.000% 0.510 sec.
10.000% 0.514 sec.
20.000% 0.517 sec.
30.000% 0.521 sec.
40.000% 0.523 sec.
50.000% 0.527 sec.
60.000% 0.530 sec.
70.000% 0.539 sec.
80.000% 0.558 sec.
90.000% 0.584 sec.
95.000% 0.589 sec.
99.000% 0.608 sec.
99.900% 0.655 sec.
99.990% 0.663 sec.
```
2018-08-27 19:14:15 +00:00
|
|
|
#include <Common/memcpySmall.h>
|
2013-09-15 10:53:53 +00:00
|
|
|
|
2019-03-10 03:16:51 +00:00
|
|
|
#ifndef NDEBUG
|
|
|
|
#include <sys/mman.h>
|
|
|
|
#endif
|
|
|
|
|
2019-10-07 18:56:03 +00:00
|
|
|
#include <Common/PODArray_fwd.h>
|
|
|
|
|
2020-08-06 13:34:23 +00:00
|
|
|
/** Whether we can use memcpy instead of a loop with assignment to T from U.
|
|
|
|
* It is Ok if types are the same. And if types are integral and of the same size,
|
|
|
|
* example: char, signed char, unsigned char.
|
|
|
|
* It's not Ok for int and float.
|
|
|
|
* Don't forget to apply std::decay when using this constexpr.
|
|
|
|
*/
|
|
|
|
template <typename T, typename U>
|
|
|
|
constexpr bool memcpy_can_be_used_for_assignment = std::is_same_v<T, U>
|
|
|
|
|| (std::is_integral_v<T> && std::is_integral_v<U> && sizeof(T) == sizeof(U));
|
2013-09-15 10:53:53 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2019-03-10 03:16:51 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int CANNOT_MPROTECT;
|
|
|
|
}
|
|
|
|
|
2017-05-07 20:25:26 +00:00
|
|
|
/** A dynamic array for POD types.
|
|
|
|
* Designed for a small number of large arrays (rather than a lot of small ones).
|
|
|
|
* To be more precise - for use in ColumnVector.
|
|
|
|
* It differs from std::vector in that it does not initialize the elements.
|
2013-12-08 02:29:40 +00:00
|
|
|
*
|
2020-08-08 00:47:03 +00:00
|
|
|
* Made noncopyable so that there are no accidental copies. You can copy the data using `assign` method.
|
2013-12-08 02:29:40 +00:00
|
|
|
*
|
2017-05-07 20:25:26 +00:00
|
|
|
* Only part of the std::vector interface is supported.
|
2013-09-15 10:53:53 +00:00
|
|
|
*
|
2017-05-07 20:25:26 +00:00
|
|
|
* The default constructor creates an empty object that does not allocate memory.
|
2019-06-28 12:51:01 +00:00
|
|
|
* Then the memory is allocated at least initial_bytes bytes.
|
2013-12-08 02:29:40 +00:00
|
|
|
*
|
2017-05-07 20:25:26 +00:00
|
|
|
* If you insert elements with push_back, without making a `reserve`, then PODArray is about 2.5 times faster than std::vector.
|
2016-04-14 21:26:06 +00:00
|
|
|
*
|
2017-05-07 20:25:26 +00:00
|
|
|
* The template parameter `pad_right` - always allocate at the end of the array as many unused bytes.
|
|
|
|
* Can be used to make optimistic reading, writing, copying with unaligned SIMD instructions.
|
2017-07-24 13:32:31 +00:00
|
|
|
*
|
2018-12-25 19:31:18 +00:00
|
|
|
* The template parameter `pad_left` - always allocate memory before 0th element of the array (rounded up to the whole number of elements)
|
|
|
|
* and zero initialize -1th element. It allows to use -1th element that will have value 0.
|
|
|
|
* This gives performance benefits when converting an array of offsets to array of sizes.
|
|
|
|
*
|
2017-07-24 13:32:31 +00:00
|
|
|
* Some methods using allocator have TAllocatorParams variadic arguments.
|
|
|
|
* These arguments will be passed to corresponding methods of TAllocator.
|
2017-07-25 19:07:07 +00:00
|
|
|
* Example: pointer to Arena, that is used for allocations.
|
|
|
|
*
|
|
|
|
* Why Allocator is not passed through constructor, as it is done in C++ standard library?
|
|
|
|
* Because sometimes we have many small objects, that share same allocator with same parameters,
|
|
|
|
* and we must avoid larger object size due to storing the same parameters in each object.
|
|
|
|
* This is required for states of aggregate functions.
|
2018-09-02 19:20:27 +00:00
|
|
|
*
|
|
|
|
* TODO Pass alignment to Allocator.
|
|
|
|
* TODO Allow greater alignment than alignof(T). Example: array of char aligned to page size.
|
2013-09-15 10:53:53 +00:00
|
|
|
*/
|
2020-05-25 22:06:02 +00:00
|
|
|
static constexpr size_t empty_pod_array_size = 1024;
|
|
|
|
extern const char empty_pod_array[empty_pod_array_size];
|
2018-12-25 18:49:09 +00:00
|
|
|
|
2018-12-26 02:47:16 +00:00
|
|
|
/** Base class that depend only on size of element, not on element itself.
|
|
|
|
* You can static_cast to this class if you want to insert some data regardless to the actual type T.
|
|
|
|
*/
|
2019-12-15 06:34:43 +00:00
|
|
|
#pragma GCC diagnostic push
|
|
|
|
#pragma GCC diagnostic ignored "-Wnull-dereference"
|
|
|
|
|
2019-06-28 12:51:01 +00:00
|
|
|
template <size_t ELEMENT_SIZE, size_t initial_bytes, typename TAllocator, size_t pad_right_, size_t pad_left_>
|
2018-12-26 02:47:16 +00:00
|
|
|
class PODArrayBase : private boost::noncopyable, private TAllocator /// empty base optimization
|
2013-09-15 10:53:53 +00:00
|
|
|
{
|
2017-06-26 12:16:29 +00:00
|
|
|
protected:
|
2017-05-10 04:00:19 +00:00
|
|
|
/// Round padding up to an whole number of elements to simplify arithmetic.
|
2018-12-26 02:47:16 +00:00
|
|
|
static constexpr size_t pad_right = integerRoundUp(pad_right_, ELEMENT_SIZE);
|
2018-12-25 19:31:18 +00:00
|
|
|
/// pad_left is also rounded up to 16 bytes to maintain alignment of allocated memory.
|
2018-12-26 02:47:16 +00:00
|
|
|
static constexpr size_t pad_left = integerRoundUp(integerRoundUp(pad_left_, ELEMENT_SIZE), 16);
|
2021-01-02 14:07:54 +00:00
|
|
|
/// Empty array will point to this static memory as padding and begin/end.
|
|
|
|
static constexpr char * null = const_cast<char *>(empty_pod_array) + pad_left;
|
2018-12-25 18:49:09 +00:00
|
|
|
|
2020-05-25 22:06:02 +00:00
|
|
|
static_assert(pad_left <= empty_pod_array_size && "Left Padding exceeds empty_pod_array_size. Is the element size too large?");
|
2016-04-14 21:26:06 +00:00
|
|
|
|
2020-05-15 16:23:31 +00:00
|
|
|
// If we are using allocator with inline memory, the minimal size of
|
|
|
|
// array must be in sync with the size of this memory.
|
|
|
|
static_assert(allocatorInitialBytes<TAllocator> == 0
|
|
|
|
|| allocatorInitialBytes<TAllocator> == initial_bytes);
|
|
|
|
|
2018-12-24 14:26:38 +00:00
|
|
|
char * c_start = null; /// Does not include pad_left.
|
|
|
|
char * c_end = null;
|
|
|
|
char * c_end_of_storage = null; /// Does not include pad_right.
|
2013-09-15 10:53:53 +00:00
|
|
|
|
2017-05-07 20:25:26 +00:00
|
|
|
/// The amount of memory occupied by the num_elements of the elements.
|
2018-12-26 02:47:16 +00:00
|
|
|
static size_t byte_size(size_t num_elements) { return num_elements * ELEMENT_SIZE; }
|
2016-04-14 21:26:06 +00:00
|
|
|
|
2017-05-07 20:25:26 +00:00
|
|
|
/// Minimum amount of memory to allocate for num_elements, including padding.
|
2018-12-24 14:26:38 +00:00
|
|
|
static size_t minimum_memory_for_elements(size_t num_elements) { return byte_size(num_elements) + pad_right + pad_left; }
|
2013-09-15 10:53:53 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
void alloc_for_num_elements(size_t num_elements)
|
|
|
|
{
|
2020-08-10 20:36:52 +00:00
|
|
|
alloc(minimum_memory_for_elements(num_elements));
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2014-05-03 22:57:43 +00:00
|
|
|
|
2017-07-24 13:32:31 +00:00
|
|
|
template <typename ... TAllocatorParams>
|
2017-12-26 17:53:31 +00:00
|
|
|
void alloc(size_t bytes, TAllocatorParams &&... allocator_params)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2018-12-24 14:26:38 +00:00
|
|
|
c_start = c_end = reinterpret_cast<char *>(TAllocator::alloc(bytes, std::forward<TAllocatorParams>(allocator_params)...)) + pad_left;
|
|
|
|
c_end_of_storage = c_start + bytes - pad_right - pad_left;
|
2018-12-26 02:47:16 +00:00
|
|
|
|
2018-12-24 14:26:38 +00:00
|
|
|
if (pad_left)
|
2018-12-26 02:47:16 +00:00
|
|
|
memset(c_start - ELEMENT_SIZE, 0, ELEMENT_SIZE);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2013-09-15 10:53:53 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
void dealloc()
|
|
|
|
{
|
2018-12-24 14:26:38 +00:00
|
|
|
if (c_start == null)
|
2017-04-01 07:20:54 +00:00
|
|
|
return;
|
2014-07-06 04:22:12 +00:00
|
|
|
|
2019-03-10 03:16:51 +00:00
|
|
|
unprotect();
|
|
|
|
|
2018-12-24 14:26:38 +00:00
|
|
|
TAllocator::free(c_start - pad_left, allocated_bytes());
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2013-09-15 10:53:53 +00:00
|
|
|
|
2017-07-24 13:32:31 +00:00
|
|
|
template <typename ... TAllocatorParams>
|
2017-12-26 17:53:31 +00:00
|
|
|
void realloc(size_t bytes, TAllocatorParams &&... allocator_params)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2018-12-24 14:26:38 +00:00
|
|
|
if (c_start == null)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2017-07-24 13:32:31 +00:00
|
|
|
alloc(bytes, std::forward<TAllocatorParams>(allocator_params)...);
|
2017-04-01 07:20:54 +00:00
|
|
|
return;
|
|
|
|
}
|
2014-07-06 04:22:12 +00:00
|
|
|
|
2019-03-10 03:16:51 +00:00
|
|
|
unprotect();
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
ptrdiff_t end_diff = c_end - c_start;
|
2013-09-15 10:53:53 +00:00
|
|
|
|
2018-12-24 14:26:38 +00:00
|
|
|
c_start = reinterpret_cast<char *>(
|
2018-12-26 02:47:16 +00:00
|
|
|
TAllocator::realloc(c_start - pad_left, allocated_bytes(), bytes, std::forward<TAllocatorParams>(allocator_params)...))
|
2018-12-24 14:26:38 +00:00
|
|
|
+ pad_left;
|
2018-12-26 02:47:16 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
c_end = c_start + end_diff;
|
2018-12-24 14:26:38 +00:00
|
|
|
c_end_of_storage = c_start + bytes - pad_right - pad_left;
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2013-09-15 10:53:53 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
bool isInitialized() const
|
|
|
|
{
|
2018-12-24 14:26:38 +00:00
|
|
|
return (c_start != null) && (c_end != null) && (c_end_of_storage != null);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2016-07-08 14:53:00 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
bool isAllocatedFromStack() const
|
|
|
|
{
|
2019-12-19 07:42:46 +00:00
|
|
|
static constexpr size_t stack_threshold = TAllocator::getStackThreshold();
|
2017-07-13 16:49:09 +00:00
|
|
|
return (stack_threshold > 0) && (allocated_bytes() <= stack_threshold);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2016-07-07 22:49:38 +00:00
|
|
|
|
2017-07-25 19:07:07 +00:00
|
|
|
template <typename ... TAllocatorParams>
|
2017-12-26 17:53:31 +00:00
|
|
|
void reserveForNextSize(TAllocatorParams &&... allocator_params)
|
2017-07-25 19:07:07 +00:00
|
|
|
{
|
2020-03-08 21:40:00 +00:00
|
|
|
if (empty())
|
2017-12-26 17:51:56 +00:00
|
|
|
{
|
2018-12-26 02:47:16 +00:00
|
|
|
// The allocated memory should be multiplication of ELEMENT_SIZE to hold the element, otherwise,
|
2017-12-26 10:40:11 +00:00
|
|
|
// memory issue such as corruption could appear in edge case.
|
2019-06-28 12:51:01 +00:00
|
|
|
realloc(std::max(integerRoundUp(initial_bytes, ELEMENT_SIZE),
|
|
|
|
minimum_memory_for_elements(1)),
|
2017-12-26 10:40:11 +00:00
|
|
|
std::forward<TAllocatorParams>(allocator_params)...);
|
2017-12-26 17:51:56 +00:00
|
|
|
}
|
2017-07-25 19:07:07 +00:00
|
|
|
else
|
|
|
|
realloc(allocated_bytes() * 2, std::forward<TAllocatorParams>(allocator_params)...);
|
|
|
|
}
|
|
|
|
|
2019-03-10 03:16:51 +00:00
|
|
|
#ifndef NDEBUG
|
|
|
|
/// Make memory region readonly with mprotect if it is large enough.
|
|
|
|
/// The operation is slow and performed only for debug builds.
|
|
|
|
void protectImpl(int prot)
|
|
|
|
{
|
2019-03-11 13:10:47 +00:00
|
|
|
static constexpr size_t PROTECT_PAGE_SIZE = 4096;
|
2019-03-10 03:16:51 +00:00
|
|
|
|
2019-03-11 13:10:47 +00:00
|
|
|
char * left_rounded_up = reinterpret_cast<char *>((reinterpret_cast<intptr_t>(c_start) - pad_left + PROTECT_PAGE_SIZE - 1) / PROTECT_PAGE_SIZE * PROTECT_PAGE_SIZE);
|
|
|
|
char * right_rounded_down = reinterpret_cast<char *>((reinterpret_cast<intptr_t>(c_end_of_storage) + pad_right) / PROTECT_PAGE_SIZE * PROTECT_PAGE_SIZE);
|
2019-03-10 03:16:51 +00:00
|
|
|
|
|
|
|
if (right_rounded_down > left_rounded_up)
|
|
|
|
{
|
|
|
|
size_t length = right_rounded_down - left_rounded_up;
|
|
|
|
if (0 != mprotect(left_rounded_up, length, prot))
|
|
|
|
throwFromErrno("Cannot mprotect memory region", ErrorCodes::CANNOT_MPROTECT);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Restore memory protection in destructor or realloc for further reuse by allocator.
|
|
|
|
bool mprotected = false;
|
|
|
|
#endif
|
|
|
|
|
2013-09-15 10:53:53 +00:00
|
|
|
public:
|
2018-12-26 02:47:16 +00:00
|
|
|
bool empty() const { return c_end == c_start; }
|
|
|
|
size_t size() const { return (c_end - c_start) / ELEMENT_SIZE; }
|
|
|
|
size_t capacity() const { return (c_end_of_storage - c_start) / ELEMENT_SIZE; }
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-03-08 03:32:43 +00:00
|
|
|
/// This method is safe to use only for information about memory usage.
|
2018-12-24 14:26:38 +00:00
|
|
|
size_t allocated_bytes() const { return c_end_of_storage - c_start + pad_right + pad_left; }
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-12-26 02:47:16 +00:00
|
|
|
void clear() { c_end = c_start; }
|
|
|
|
|
|
|
|
template <typename ... TAllocatorParams>
|
2020-09-17 10:28:54 +00:00
|
|
|
#if defined(__clang__)
|
|
|
|
ALWAYS_INLINE /// Better performance in clang build, worse performance in gcc build.
|
|
|
|
#endif
|
2020-08-05 14:12:41 +00:00
|
|
|
void reserve(size_t n, TAllocatorParams &&... allocator_params)
|
2018-12-26 02:47:16 +00:00
|
|
|
{
|
|
|
|
if (n > capacity())
|
|
|
|
realloc(roundUpToPowerOfTwoOrZero(minimum_memory_for_elements(n)), std::forward<TAllocatorParams>(allocator_params)...);
|
|
|
|
}
|
|
|
|
|
2020-08-07 15:18:32 +00:00
|
|
|
template <typename ... TAllocatorParams>
|
|
|
|
void reserve_exact(size_t n, TAllocatorParams &&... allocator_params)
|
|
|
|
{
|
|
|
|
if (n > capacity())
|
|
|
|
realloc(minimum_memory_for_elements(n), std::forward<TAllocatorParams>(allocator_params)...);
|
|
|
|
}
|
|
|
|
|
2018-12-26 02:47:16 +00:00
|
|
|
template <typename ... TAllocatorParams>
|
|
|
|
void resize(size_t n, TAllocatorParams &&... allocator_params)
|
|
|
|
{
|
|
|
|
reserve(n, std::forward<TAllocatorParams>(allocator_params)...);
|
|
|
|
resize_assume_reserved(n);
|
|
|
|
}
|
|
|
|
|
2020-08-07 15:18:32 +00:00
|
|
|
template <typename ... TAllocatorParams>
|
|
|
|
void resize_exact(size_t n, TAllocatorParams &&... allocator_params)
|
|
|
|
{
|
|
|
|
reserve_exact(n, std::forward<TAllocatorParams>(allocator_params)...);
|
|
|
|
resize_assume_reserved(n);
|
|
|
|
}
|
|
|
|
|
2018-12-26 02:47:16 +00:00
|
|
|
void resize_assume_reserved(const size_t n)
|
|
|
|
{
|
|
|
|
c_end = c_start + byte_size(n);
|
|
|
|
}
|
|
|
|
|
|
|
|
const char * raw_data() const
|
|
|
|
{
|
|
|
|
return c_start;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename ... TAllocatorParams>
|
2020-05-08 14:59:58 +00:00
|
|
|
void push_back_raw(const void * ptr, TAllocatorParams &&... allocator_params)
|
|
|
|
{
|
|
|
|
push_back_raw_many(1, ptr, std::forward<TAllocatorParams>(allocator_params)...);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename ... TAllocatorParams>
|
|
|
|
void push_back_raw_many(size_t number_of_items, const void * ptr, TAllocatorParams &&... allocator_params)
|
2018-12-26 02:47:16 +00:00
|
|
|
{
|
2020-06-20 05:39:52 +00:00
|
|
|
size_t required_capacity = size() + number_of_items;
|
|
|
|
if (unlikely(required_capacity > capacity()))
|
|
|
|
reserve(required_capacity, std::forward<TAllocatorParams>(allocator_params)...);
|
2018-12-26 02:47:16 +00:00
|
|
|
|
2020-06-20 07:45:51 +00:00
|
|
|
size_t items_byte_size = byte_size(number_of_items);
|
2021-01-02 14:07:54 +00:00
|
|
|
if (items_byte_size)
|
|
|
|
{
|
|
|
|
memcpy(c_end, ptr, items_byte_size);
|
|
|
|
c_end += items_byte_size;
|
|
|
|
}
|
2018-12-26 02:47:16 +00:00
|
|
|
}
|
|
|
|
|
2019-03-10 03:16:51 +00:00
|
|
|
void protect()
|
|
|
|
{
|
|
|
|
#ifndef NDEBUG
|
|
|
|
protectImpl(PROT_READ);
|
|
|
|
mprotected = true;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
void unprotect()
|
|
|
|
{
|
|
|
|
#ifndef NDEBUG
|
|
|
|
if (mprotected)
|
|
|
|
protectImpl(PROT_WRITE);
|
|
|
|
mprotected = false;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2021-01-02 14:07:54 +00:00
|
|
|
template <typename It1, typename It2>
|
|
|
|
inline void assertNotIntersects(It1 from_begin [[maybe_unused]], It2 from_end [[maybe_unused]])
|
|
|
|
{
|
|
|
|
#if !defined(NDEBUG)
|
|
|
|
const char * ptr_begin = reinterpret_cast<const char *>(&*from_begin);
|
|
|
|
const char * ptr_end = reinterpret_cast<const char *>(&*from_end);
|
|
|
|
|
|
|
|
/// Also it's safe if the range is empty.
|
2021-01-03 15:07:18 +00:00
|
|
|
assert(!((ptr_begin >= c_start && ptr_begin < c_end) || (ptr_end > c_start && ptr_end <= c_end)) || (ptr_begin == ptr_end));
|
2021-01-02 14:07:54 +00:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2018-12-26 02:47:16 +00:00
|
|
|
~PODArrayBase()
|
|
|
|
{
|
|
|
|
dealloc();
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-10-07 18:56:03 +00:00
|
|
|
template <typename T, size_t initial_bytes, typename TAllocator, size_t pad_right_, size_t pad_left_>
|
2019-06-28 12:51:01 +00:00
|
|
|
class PODArray : public PODArrayBase<sizeof(T), initial_bytes, TAllocator, pad_right_, pad_left_>
|
2018-12-26 02:47:16 +00:00
|
|
|
{
|
|
|
|
protected:
|
2019-06-28 12:51:01 +00:00
|
|
|
using Base = PODArrayBase<sizeof(T), initial_bytes, TAllocator, pad_right_, pad_left_>;
|
2018-12-26 02:47:16 +00:00
|
|
|
|
|
|
|
T * t_start() { return reinterpret_cast<T *>(this->c_start); }
|
|
|
|
T * t_end() { return reinterpret_cast<T *>(this->c_end); }
|
|
|
|
T * t_end_of_storage() { return reinterpret_cast<T *>(this->c_end_of_storage); }
|
|
|
|
|
|
|
|
const T * t_start() const { return reinterpret_cast<const T *>(this->c_start); }
|
|
|
|
const T * t_end() const { return reinterpret_cast<const T *>(this->c_end); }
|
|
|
|
const T * t_end_of_storage() const { return reinterpret_cast<const T *>(this->c_end_of_storage); }
|
|
|
|
|
|
|
|
public:
|
|
|
|
using value_type = T;
|
|
|
|
|
2020-03-03 20:00:28 +00:00
|
|
|
/// We cannot use boost::iterator_adaptor, because it defeats loop vectorization,
|
|
|
|
/// see https://github.com/ClickHouse/ClickHouse/pull/9442
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-03-03 20:00:28 +00:00
|
|
|
using iterator = T *;
|
|
|
|
using const_iterator = const T *;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
|
|
|
|
PODArray() {}
|
|
|
|
|
|
|
|
PODArray(size_t n)
|
|
|
|
{
|
2018-12-26 02:47:16 +00:00
|
|
|
this->alloc_for_num_elements(n);
|
|
|
|
this->c_end += this->byte_size(n);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
PODArray(size_t n, const T & x)
|
|
|
|
{
|
2018-12-26 02:47:16 +00:00
|
|
|
this->alloc_for_num_elements(n);
|
2017-04-01 07:20:54 +00:00
|
|
|
assign(n, x);
|
|
|
|
}
|
|
|
|
|
2020-02-28 19:14:13 +00:00
|
|
|
PODArray(const_iterator from_begin, const_iterator from_end)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2018-12-26 02:47:16 +00:00
|
|
|
this->alloc_for_num_elements(from_end - from_begin);
|
2017-04-01 07:20:54 +00:00
|
|
|
insert(from_begin, from_end);
|
|
|
|
}
|
|
|
|
|
2020-08-03 17:15:08 +00:00
|
|
|
PODArray(std::initializer_list<T> il)
|
|
|
|
{
|
2020-08-03 17:22:59 +00:00
|
|
|
this->reserve(std::size(il));
|
2020-08-03 17:15:08 +00:00
|
|
|
|
|
|
|
for (const auto & x : il)
|
|
|
|
{
|
2020-08-03 17:22:59 +00:00
|
|
|
this->push_back(x);
|
2020-08-03 17:15:08 +00:00
|
|
|
}
|
|
|
|
}
|
2017-12-15 21:32:25 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
PODArray(PODArray && other)
|
|
|
|
{
|
|
|
|
this->swap(other);
|
|
|
|
}
|
|
|
|
|
|
|
|
PODArray & operator=(PODArray && other)
|
|
|
|
{
|
|
|
|
this->swap(other);
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
T * data() { return t_start(); }
|
|
|
|
const T * data() const { return t_start(); }
|
|
|
|
|
2018-12-26 05:34:25 +00:00
|
|
|
/// The index is signed to access -1th element without pointer overflow.
|
2019-03-07 20:04:59 +00:00
|
|
|
T & operator[] (ssize_t n)
|
|
|
|
{
|
|
|
|
/// <= size, because taking address of one element past memory range is Ok in C++ (expression like &arr[arr.size()] is perfectly valid).
|
|
|
|
assert((n >= (static_cast<ssize_t>(pad_left_) ? -1 : 0)) && (n <= static_cast<ssize_t>(this->size())));
|
|
|
|
return t_start()[n];
|
|
|
|
}
|
|
|
|
|
|
|
|
const T & operator[] (ssize_t n) const
|
|
|
|
{
|
|
|
|
assert((n >= (static_cast<ssize_t>(pad_left_) ? -1 : 0)) && (n <= static_cast<ssize_t>(this->size())));
|
|
|
|
return t_start()[n];
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
T & front() { return t_start()[0]; }
|
2017-05-10 04:00:19 +00:00
|
|
|
T & back() { return t_end()[-1]; }
|
2017-04-01 07:20:54 +00:00
|
|
|
const T & front() const { return t_start()[0]; }
|
|
|
|
const T & back() const { return t_end()[-1]; }
|
|
|
|
|
2017-05-10 04:00:19 +00:00
|
|
|
iterator begin() { return t_start(); }
|
|
|
|
iterator end() { return t_end(); }
|
|
|
|
const_iterator begin() const { return t_start(); }
|
|
|
|
const_iterator end() const { return t_end(); }
|
|
|
|
const_iterator cbegin() const { return t_start(); }
|
|
|
|
const_iterator cend() const { return t_end(); }
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-05-10 04:00:19 +00:00
|
|
|
/// Same as resize, but zeroes new elements.
|
2017-04-01 07:20:54 +00:00
|
|
|
void resize_fill(size_t n)
|
|
|
|
{
|
2018-12-26 02:47:16 +00:00
|
|
|
size_t old_size = this->size();
|
2017-04-01 07:20:54 +00:00
|
|
|
if (n > old_size)
|
|
|
|
{
|
2018-12-26 02:47:16 +00:00
|
|
|
this->reserve(n);
|
|
|
|
memset(this->c_end, 0, this->byte_size(n - old_size));
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2018-12-26 02:47:16 +00:00
|
|
|
this->c_end = this->c_start + this->byte_size(n);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void resize_fill(size_t n, const T & value)
|
|
|
|
{
|
2018-12-26 02:47:16 +00:00
|
|
|
size_t old_size = this->size();
|
2017-04-01 07:20:54 +00:00
|
|
|
if (n > old_size)
|
|
|
|
{
|
2018-12-26 02:47:16 +00:00
|
|
|
this->reserve(n);
|
2017-04-01 07:20:54 +00:00
|
|
|
std::fill(t_end(), t_end() + n - old_size, value);
|
|
|
|
}
|
2018-12-26 02:47:16 +00:00
|
|
|
this->c_end = this->c_start + this->byte_size(n);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
2019-01-08 14:56:50 +00:00
|
|
|
template <typename U, typename ... TAllocatorParams>
|
|
|
|
void push_back(U && x, TAllocatorParams &&... allocator_params)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2018-12-26 02:47:16 +00:00
|
|
|
if (unlikely(this->c_end == this->c_end_of_storage))
|
|
|
|
this->reserveForNextSize(std::forward<TAllocatorParams>(allocator_params)...);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-01-08 14:56:50 +00:00
|
|
|
new (t_end()) T(std::forward<U>(x));
|
2018-12-26 02:47:16 +00:00
|
|
|
this->c_end += this->byte_size(1);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
2017-07-25 19:07:07 +00:00
|
|
|
/** This method doesn't allow to pass parameters for Allocator,
|
|
|
|
* and it couldn't be used if Allocator requires custom parameters.
|
|
|
|
*/
|
2017-04-01 07:20:54 +00:00
|
|
|
template <typename... Args>
|
|
|
|
void emplace_back(Args &&... args)
|
|
|
|
{
|
2018-12-26 02:47:16 +00:00
|
|
|
if (unlikely(this->c_end == this->c_end_of_storage))
|
|
|
|
this->reserveForNextSize();
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
new (t_end()) T(std::forward<Args>(args)...);
|
2018-12-26 02:47:16 +00:00
|
|
|
this->c_end += this->byte_size(1);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void pop_back()
|
|
|
|
{
|
2018-12-26 02:47:16 +00:00
|
|
|
this->c_end -= this->byte_size(1);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
2017-05-10 04:00:19 +00:00
|
|
|
/// Do not insert into the array a piece of itself. Because with the resize, the iterators on themselves can be invalidated.
|
2020-02-28 19:14:13 +00:00
|
|
|
template <typename It1, typename It2, typename ... TAllocatorParams>
|
Padding for IO buffers.
Testing data
```
select 'aaaaaaaa','bbbbbbbb','cccccccc','dddddddd','eeeeeeee','ffffffff','gggg','hhh' from numbers(3000000) into outfile '/tmp/test.tsv'
```
Testing command
```
echo "select count() from file('/tmp/test.tsv', CSV, 'a String, b String, c String, d String, e String, f String, g String, h String') where not ignore(e)" | clickhouse-benchmark
```
TSV parser has less overhead than CSV, using it would better unveil the benefits of memcpySmall.
Before
```
QPS: 1.662, RPS: 4985463.906, MiB/s: 603.823, result RPS: 1.662, result MiB/s: 0.000.
0.000% 0.559 sec.
10.000% 0.564 sec.
20.000% 0.568 sec.
30.000% 0.572 sec.
40.000% 0.575 sec.
50.000% 0.581 sec.
60.000% 0.592 sec.
70.000% 0.624 sec.
80.000% 0.639 sec.
90.000% 0.664 sec.
95.000% 0.686 sec.
99.000% 0.711 sec.
99.900% 0.715 sec.
99.990% 0.716 sec.
```
After
```
QPS: 1.861, RPS: 5582303.107, MiB/s: 676.110, result RPS: 1.861, result MiB/s: 0.000.
0.000% 0.510 sec.
10.000% 0.514 sec.
20.000% 0.517 sec.
30.000% 0.521 sec.
40.000% 0.523 sec.
50.000% 0.527 sec.
60.000% 0.530 sec.
70.000% 0.539 sec.
80.000% 0.558 sec.
90.000% 0.584 sec.
95.000% 0.589 sec.
99.000% 0.608 sec.
99.900% 0.655 sec.
99.990% 0.663 sec.
```
2018-08-27 19:14:15 +00:00
|
|
|
void insertPrepare(It1 from_begin, It2 from_end, TAllocatorParams &&... allocator_params)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2021-01-02 14:07:54 +00:00
|
|
|
this->assertNotIntersects(from_begin, from_end);
|
2018-12-26 02:47:16 +00:00
|
|
|
size_t required_capacity = this->size() + (from_end - from_begin);
|
|
|
|
if (required_capacity > this->capacity())
|
|
|
|
this->reserve(roundUpToPowerOfTwoOrZero(required_capacity), std::forward<TAllocatorParams>(allocator_params)...);
|
Padding for IO buffers.
Testing data
```
select 'aaaaaaaa','bbbbbbbb','cccccccc','dddddddd','eeeeeeee','ffffffff','gggg','hhh' from numbers(3000000) into outfile '/tmp/test.tsv'
```
Testing command
```
echo "select count() from file('/tmp/test.tsv', CSV, 'a String, b String, c String, d String, e String, f String, g String, h String') where not ignore(e)" | clickhouse-benchmark
```
TSV parser has less overhead than CSV, using it would better unveil the benefits of memcpySmall.
Before
```
QPS: 1.662, RPS: 4985463.906, MiB/s: 603.823, result RPS: 1.662, result MiB/s: 0.000.
0.000% 0.559 sec.
10.000% 0.564 sec.
20.000% 0.568 sec.
30.000% 0.572 sec.
40.000% 0.575 sec.
50.000% 0.581 sec.
60.000% 0.592 sec.
70.000% 0.624 sec.
80.000% 0.639 sec.
90.000% 0.664 sec.
95.000% 0.686 sec.
99.000% 0.711 sec.
99.900% 0.715 sec.
99.990% 0.716 sec.
```
After
```
QPS: 1.861, RPS: 5582303.107, MiB/s: 676.110, result RPS: 1.861, result MiB/s: 0.000.
0.000% 0.510 sec.
10.000% 0.514 sec.
20.000% 0.517 sec.
30.000% 0.521 sec.
40.000% 0.523 sec.
50.000% 0.527 sec.
60.000% 0.530 sec.
70.000% 0.539 sec.
80.000% 0.558 sec.
90.000% 0.584 sec.
95.000% 0.589 sec.
99.000% 0.608 sec.
99.900% 0.655 sec.
99.990% 0.663 sec.
```
2018-08-27 19:14:15 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
Padding for IO buffers.
Testing data
```
select 'aaaaaaaa','bbbbbbbb','cccccccc','dddddddd','eeeeeeee','ffffffff','gggg','hhh' from numbers(3000000) into outfile '/tmp/test.tsv'
```
Testing command
```
echo "select count() from file('/tmp/test.tsv', CSV, 'a String, b String, c String, d String, e String, f String, g String, h String') where not ignore(e)" | clickhouse-benchmark
```
TSV parser has less overhead than CSV, using it would better unveil the benefits of memcpySmall.
Before
```
QPS: 1.662, RPS: 4985463.906, MiB/s: 603.823, result RPS: 1.662, result MiB/s: 0.000.
0.000% 0.559 sec.
10.000% 0.564 sec.
20.000% 0.568 sec.
30.000% 0.572 sec.
40.000% 0.575 sec.
50.000% 0.581 sec.
60.000% 0.592 sec.
70.000% 0.624 sec.
80.000% 0.639 sec.
90.000% 0.664 sec.
95.000% 0.686 sec.
99.000% 0.711 sec.
99.900% 0.715 sec.
99.990% 0.716 sec.
```
After
```
QPS: 1.861, RPS: 5582303.107, MiB/s: 676.110, result RPS: 1.861, result MiB/s: 0.000.
0.000% 0.510 sec.
10.000% 0.514 sec.
20.000% 0.517 sec.
30.000% 0.521 sec.
40.000% 0.523 sec.
50.000% 0.527 sec.
60.000% 0.530 sec.
70.000% 0.539 sec.
80.000% 0.558 sec.
90.000% 0.584 sec.
95.000% 0.589 sec.
99.000% 0.608 sec.
99.900% 0.655 sec.
99.990% 0.663 sec.
```
2018-08-27 19:14:15 +00:00
|
|
|
/// Do not insert into the array a piece of itself. Because with the resize, the iterators on themselves can be invalidated.
|
2020-02-28 19:14:13 +00:00
|
|
|
template <typename It1, typename It2, typename ... TAllocatorParams>
|
Padding for IO buffers.
Testing data
```
select 'aaaaaaaa','bbbbbbbb','cccccccc','dddddddd','eeeeeeee','ffffffff','gggg','hhh' from numbers(3000000) into outfile '/tmp/test.tsv'
```
Testing command
```
echo "select count() from file('/tmp/test.tsv', CSV, 'a String, b String, c String, d String, e String, f String, g String, h String') where not ignore(e)" | clickhouse-benchmark
```
TSV parser has less overhead than CSV, using it would better unveil the benefits of memcpySmall.
Before
```
QPS: 1.662, RPS: 4985463.906, MiB/s: 603.823, result RPS: 1.662, result MiB/s: 0.000.
0.000% 0.559 sec.
10.000% 0.564 sec.
20.000% 0.568 sec.
30.000% 0.572 sec.
40.000% 0.575 sec.
50.000% 0.581 sec.
60.000% 0.592 sec.
70.000% 0.624 sec.
80.000% 0.639 sec.
90.000% 0.664 sec.
95.000% 0.686 sec.
99.000% 0.711 sec.
99.900% 0.715 sec.
99.990% 0.716 sec.
```
After
```
QPS: 1.861, RPS: 5582303.107, MiB/s: 676.110, result RPS: 1.861, result MiB/s: 0.000.
0.000% 0.510 sec.
10.000% 0.514 sec.
20.000% 0.517 sec.
30.000% 0.521 sec.
40.000% 0.523 sec.
50.000% 0.527 sec.
60.000% 0.530 sec.
70.000% 0.539 sec.
80.000% 0.558 sec.
90.000% 0.584 sec.
95.000% 0.589 sec.
99.000% 0.608 sec.
99.900% 0.655 sec.
99.990% 0.663 sec.
```
2018-08-27 19:14:15 +00:00
|
|
|
void insert(It1 from_begin, It2 from_end, TAllocatorParams &&... allocator_params)
|
|
|
|
{
|
|
|
|
insertPrepare(from_begin, from_end, std::forward<TAllocatorParams>(allocator_params)...);
|
2017-04-01 07:20:54 +00:00
|
|
|
insert_assume_reserved(from_begin, from_end);
|
|
|
|
}
|
|
|
|
|
2021-01-02 14:07:54 +00:00
|
|
|
/// In contrast to 'insert' this method is Ok even for inserting from itself.
|
|
|
|
/// Because we obtain iterators after reserving memory.
|
|
|
|
template <typename Container, typename ... TAllocatorParams>
|
|
|
|
void insertByOffsets(Container && rhs, size_t from_begin, size_t from_end, TAllocatorParams &&... allocator_params)
|
|
|
|
{
|
|
|
|
static_assert(memcpy_can_be_used_for_assignment<std::decay_t<T>, std::decay_t<decltype(rhs.front())>>);
|
|
|
|
|
|
|
|
assert(from_end >= from_begin);
|
|
|
|
assert(from_end <= rhs.size());
|
|
|
|
|
|
|
|
size_t required_capacity = this->size() + (from_end - from_begin);
|
|
|
|
if (required_capacity > this->capacity())
|
|
|
|
this->reserve(roundUpToPowerOfTwoOrZero(required_capacity), std::forward<TAllocatorParams>(allocator_params)...);
|
|
|
|
|
|
|
|
size_t bytes_to_copy = this->byte_size(from_end - from_begin);
|
|
|
|
if (bytes_to_copy)
|
|
|
|
{
|
|
|
|
memcpy(this->c_end, reinterpret_cast<const void *>(rhs.begin() + from_begin), bytes_to_copy);
|
|
|
|
this->c_end += bytes_to_copy;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Padding for IO buffers.
Testing data
```
select 'aaaaaaaa','bbbbbbbb','cccccccc','dddddddd','eeeeeeee','ffffffff','gggg','hhh' from numbers(3000000) into outfile '/tmp/test.tsv'
```
Testing command
```
echo "select count() from file('/tmp/test.tsv', CSV, 'a String, b String, c String, d String, e String, f String, g String, h String') where not ignore(e)" | clickhouse-benchmark
```
TSV parser has less overhead than CSV, using it would better unveil the benefits of memcpySmall.
Before
```
QPS: 1.662, RPS: 4985463.906, MiB/s: 603.823, result RPS: 1.662, result MiB/s: 0.000.
0.000% 0.559 sec.
10.000% 0.564 sec.
20.000% 0.568 sec.
30.000% 0.572 sec.
40.000% 0.575 sec.
50.000% 0.581 sec.
60.000% 0.592 sec.
70.000% 0.624 sec.
80.000% 0.639 sec.
90.000% 0.664 sec.
95.000% 0.686 sec.
99.000% 0.711 sec.
99.900% 0.715 sec.
99.990% 0.716 sec.
```
After
```
QPS: 1.861, RPS: 5582303.107, MiB/s: 676.110, result RPS: 1.861, result MiB/s: 0.000.
0.000% 0.510 sec.
10.000% 0.514 sec.
20.000% 0.517 sec.
30.000% 0.521 sec.
40.000% 0.523 sec.
50.000% 0.527 sec.
60.000% 0.530 sec.
70.000% 0.539 sec.
80.000% 0.558 sec.
90.000% 0.584 sec.
95.000% 0.589 sec.
99.000% 0.608 sec.
99.900% 0.655 sec.
99.990% 0.663 sec.
```
2018-08-27 19:14:15 +00:00
|
|
|
/// Works under assumption, that it's possible to read up to 15 excessive bytes after `from_end` and this PODArray is padded.
|
2020-02-28 19:14:13 +00:00
|
|
|
template <typename It1, typename It2, typename ... TAllocatorParams>
|
Padding for IO buffers.
Testing data
```
select 'aaaaaaaa','bbbbbbbb','cccccccc','dddddddd','eeeeeeee','ffffffff','gggg','hhh' from numbers(3000000) into outfile '/tmp/test.tsv'
```
Testing command
```
echo "select count() from file('/tmp/test.tsv', CSV, 'a String, b String, c String, d String, e String, f String, g String, h String') where not ignore(e)" | clickhouse-benchmark
```
TSV parser has less overhead than CSV, using it would better unveil the benefits of memcpySmall.
Before
```
QPS: 1.662, RPS: 4985463.906, MiB/s: 603.823, result RPS: 1.662, result MiB/s: 0.000.
0.000% 0.559 sec.
10.000% 0.564 sec.
20.000% 0.568 sec.
30.000% 0.572 sec.
40.000% 0.575 sec.
50.000% 0.581 sec.
60.000% 0.592 sec.
70.000% 0.624 sec.
80.000% 0.639 sec.
90.000% 0.664 sec.
95.000% 0.686 sec.
99.000% 0.711 sec.
99.900% 0.715 sec.
99.990% 0.716 sec.
```
After
```
QPS: 1.861, RPS: 5582303.107, MiB/s: 676.110, result RPS: 1.861, result MiB/s: 0.000.
0.000% 0.510 sec.
10.000% 0.514 sec.
20.000% 0.517 sec.
30.000% 0.521 sec.
40.000% 0.523 sec.
50.000% 0.527 sec.
60.000% 0.530 sec.
70.000% 0.539 sec.
80.000% 0.558 sec.
90.000% 0.584 sec.
95.000% 0.589 sec.
99.000% 0.608 sec.
99.900% 0.655 sec.
99.990% 0.663 sec.
```
2018-08-27 19:14:15 +00:00
|
|
|
void insertSmallAllowReadWriteOverflow15(It1 from_begin, It2 from_end, TAllocatorParams &&... allocator_params)
|
|
|
|
{
|
|
|
|
static_assert(pad_right_ >= 15);
|
2020-07-08 01:21:39 +00:00
|
|
|
static_assert(sizeof(T) == sizeof(*from_begin));
|
Padding for IO buffers.
Testing data
```
select 'aaaaaaaa','bbbbbbbb','cccccccc','dddddddd','eeeeeeee','ffffffff','gggg','hhh' from numbers(3000000) into outfile '/tmp/test.tsv'
```
Testing command
```
echo "select count() from file('/tmp/test.tsv', CSV, 'a String, b String, c String, d String, e String, f String, g String, h String') where not ignore(e)" | clickhouse-benchmark
```
TSV parser has less overhead than CSV, using it would better unveil the benefits of memcpySmall.
Before
```
QPS: 1.662, RPS: 4985463.906, MiB/s: 603.823, result RPS: 1.662, result MiB/s: 0.000.
0.000% 0.559 sec.
10.000% 0.564 sec.
20.000% 0.568 sec.
30.000% 0.572 sec.
40.000% 0.575 sec.
50.000% 0.581 sec.
60.000% 0.592 sec.
70.000% 0.624 sec.
80.000% 0.639 sec.
90.000% 0.664 sec.
95.000% 0.686 sec.
99.000% 0.711 sec.
99.900% 0.715 sec.
99.990% 0.716 sec.
```
After
```
QPS: 1.861, RPS: 5582303.107, MiB/s: 676.110, result RPS: 1.861, result MiB/s: 0.000.
0.000% 0.510 sec.
10.000% 0.514 sec.
20.000% 0.517 sec.
30.000% 0.521 sec.
40.000% 0.523 sec.
50.000% 0.527 sec.
60.000% 0.530 sec.
70.000% 0.539 sec.
80.000% 0.558 sec.
90.000% 0.584 sec.
95.000% 0.589 sec.
99.000% 0.608 sec.
99.900% 0.655 sec.
99.990% 0.663 sec.
```
2018-08-27 19:14:15 +00:00
|
|
|
insertPrepare(from_begin, from_end, std::forward<TAllocatorParams>(allocator_params)...);
|
2020-02-28 19:14:13 +00:00
|
|
|
size_t bytes_to_copy = this->byte_size(from_end - from_begin);
|
2018-12-26 02:47:16 +00:00
|
|
|
memcpySmallAllowReadWriteOverflow15(this->c_end, reinterpret_cast<const void *>(&*from_begin), bytes_to_copy);
|
|
|
|
this->c_end += bytes_to_copy;
|
Padding for IO buffers.
Testing data
```
select 'aaaaaaaa','bbbbbbbb','cccccccc','dddddddd','eeeeeeee','ffffffff','gggg','hhh' from numbers(3000000) into outfile '/tmp/test.tsv'
```
Testing command
```
echo "select count() from file('/tmp/test.tsv', CSV, 'a String, b String, c String, d String, e String, f String, g String, h String') where not ignore(e)" | clickhouse-benchmark
```
TSV parser has less overhead than CSV, using it would better unveil the benefits of memcpySmall.
Before
```
QPS: 1.662, RPS: 4985463.906, MiB/s: 603.823, result RPS: 1.662, result MiB/s: 0.000.
0.000% 0.559 sec.
10.000% 0.564 sec.
20.000% 0.568 sec.
30.000% 0.572 sec.
40.000% 0.575 sec.
50.000% 0.581 sec.
60.000% 0.592 sec.
70.000% 0.624 sec.
80.000% 0.639 sec.
90.000% 0.664 sec.
95.000% 0.686 sec.
99.000% 0.711 sec.
99.900% 0.715 sec.
99.990% 0.716 sec.
```
After
```
QPS: 1.861, RPS: 5582303.107, MiB/s: 676.110, result RPS: 1.861, result MiB/s: 0.000.
0.000% 0.510 sec.
10.000% 0.514 sec.
20.000% 0.517 sec.
30.000% 0.521 sec.
40.000% 0.523 sec.
50.000% 0.527 sec.
60.000% 0.530 sec.
70.000% 0.539 sec.
80.000% 0.558 sec.
90.000% 0.584 sec.
95.000% 0.589 sec.
99.000% 0.608 sec.
99.900% 0.655 sec.
99.990% 0.663 sec.
```
2018-08-27 19:14:15 +00:00
|
|
|
}
|
|
|
|
|
2020-07-09 01:21:25 +00:00
|
|
|
/// Do not insert into the array a piece of itself. Because with the resize, the iterators on themselves can be invalidated.
|
2020-02-28 19:14:13 +00:00
|
|
|
template <typename It1, typename It2>
|
2017-04-01 07:20:54 +00:00
|
|
|
void insert(iterator it, It1 from_begin, It2 from_end)
|
|
|
|
{
|
2020-08-06 13:34:23 +00:00
|
|
|
static_assert(memcpy_can_be_used_for_assignment<std::decay_t<T>, std::decay_t<decltype(*from_begin)>>);
|
2020-08-03 17:15:08 +00:00
|
|
|
|
2020-02-28 19:14:13 +00:00
|
|
|
size_t bytes_to_copy = this->byte_size(from_end - from_begin);
|
2021-01-02 14:07:54 +00:00
|
|
|
if (!bytes_to_copy)
|
|
|
|
return;
|
|
|
|
|
2020-07-08 01:21:39 +00:00
|
|
|
size_t bytes_to_move = this->byte_size(end() - it);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-11-15 14:36:52 +00:00
|
|
|
insertPrepare(from_begin, from_end);
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (unlikely(bytes_to_move))
|
2018-12-26 02:47:16 +00:00
|
|
|
memcpy(this->c_end + bytes_to_copy - bytes_to_move, this->c_end - bytes_to_move, bytes_to_move);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-08-03 17:15:08 +00:00
|
|
|
memcpy(this->c_end - bytes_to_move, reinterpret_cast<const void *>(&*from_begin), bytes_to_copy);
|
2020-07-08 01:21:39 +00:00
|
|
|
|
2018-12-26 02:47:16 +00:00
|
|
|
this->c_end += bytes_to_copy;
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
2020-02-28 19:14:13 +00:00
|
|
|
template <typename It1, typename It2>
|
2017-04-01 07:20:54 +00:00
|
|
|
void insert_assume_reserved(It1 from_begin, It2 from_end)
|
|
|
|
{
|
2020-08-06 13:34:23 +00:00
|
|
|
static_assert(memcpy_can_be_used_for_assignment<std::decay_t<T>, std::decay_t<decltype(*from_begin)>>);
|
2021-01-02 14:07:54 +00:00
|
|
|
this->assertNotIntersects(from_begin, from_end);
|
2020-08-03 17:15:08 +00:00
|
|
|
|
|
|
|
size_t bytes_to_copy = this->byte_size(from_end - from_begin);
|
2021-01-02 14:07:54 +00:00
|
|
|
if (bytes_to_copy)
|
|
|
|
{
|
|
|
|
memcpy(this->c_end, reinterpret_cast<const void *>(&*from_begin), bytes_to_copy);
|
|
|
|
this->c_end += bytes_to_copy;
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
2019-12-19 07:42:46 +00:00
|
|
|
template <typename... TAllocatorParams>
|
|
|
|
void swap(PODArray & rhs, TAllocatorParams &&... allocator_params)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2019-03-10 03:16:51 +00:00
|
|
|
#ifndef NDEBUG
|
|
|
|
this->unprotect();
|
|
|
|
rhs.unprotect();
|
|
|
|
#endif
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Swap two PODArray objects, arr1 and arr2, that satisfy the following conditions:
|
|
|
|
/// - The elements of arr1 are stored on stack.
|
|
|
|
/// - The elements of arr2 are stored on heap.
|
2019-12-19 07:42:46 +00:00
|
|
|
auto swap_stack_heap = [&](PODArray & arr1, PODArray & arr2)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
size_t stack_size = arr1.size();
|
2017-07-13 16:49:09 +00:00
|
|
|
size_t stack_allocated = arr1.allocated_bytes();
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
size_t heap_size = arr2.size();
|
2017-07-13 16:49:09 +00:00
|
|
|
size_t heap_allocated = arr2.allocated_bytes();
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
/// Keep track of the stack content we have to copy.
|
|
|
|
char * stack_c_start = arr1.c_start;
|
|
|
|
|
|
|
|
/// arr1 takes ownership of the heap memory of arr2.
|
|
|
|
arr1.c_start = arr2.c_start;
|
|
|
|
arr1.c_end_of_storage = arr1.c_start + heap_allocated - arr1.pad_right;
|
2018-12-26 02:47:16 +00:00
|
|
|
arr1.c_end = arr1.c_start + this->byte_size(heap_size);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
/// Allocate stack space for arr2.
|
2019-12-19 07:42:46 +00:00
|
|
|
arr2.alloc(stack_allocated, std::forward<TAllocatorParams>(allocator_params)...);
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Copy the stack content.
|
2018-12-26 02:47:16 +00:00
|
|
|
memcpy(arr2.c_start, stack_c_start, this->byte_size(stack_size));
|
|
|
|
arr2.c_end = arr2.c_start + this->byte_size(stack_size);
|
2017-04-01 07:20:54 +00:00
|
|
|
};
|
|
|
|
|
2019-12-19 07:42:46 +00:00
|
|
|
auto do_move = [&](PODArray & src, PODArray & dest)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
if (src.isAllocatedFromStack())
|
|
|
|
{
|
|
|
|
dest.dealloc();
|
2019-12-19 07:42:46 +00:00
|
|
|
dest.alloc(src.allocated_bytes(), std::forward<TAllocatorParams>(allocator_params)...);
|
2018-12-26 02:47:16 +00:00
|
|
|
memcpy(dest.c_start, src.c_start, this->byte_size(src.size()));
|
2017-04-01 07:20:54 +00:00
|
|
|
dest.c_end = dest.c_start + (src.c_end - src.c_start);
|
|
|
|
|
2018-12-26 02:47:16 +00:00
|
|
|
src.c_start = Base::null;
|
|
|
|
src.c_end = Base::null;
|
|
|
|
src.c_end_of_storage = Base::null;
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
std::swap(dest.c_start, src.c_start);
|
|
|
|
std::swap(dest.c_end, src.c_end);
|
|
|
|
std::swap(dest.c_end_of_storage, src.c_end_of_storage);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2018-12-26 02:47:16 +00:00
|
|
|
if (!this->isInitialized() && !rhs.isInitialized())
|
2019-03-10 03:16:51 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
return;
|
2019-03-10 03:16:51 +00:00
|
|
|
}
|
2018-12-26 02:47:16 +00:00
|
|
|
else if (!this->isInitialized() && rhs.isInitialized())
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
do_move(rhs, *this);
|
|
|
|
return;
|
|
|
|
}
|
2018-12-26 02:47:16 +00:00
|
|
|
else if (this->isInitialized() && !rhs.isInitialized())
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
do_move(*this, rhs);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-12-26 02:47:16 +00:00
|
|
|
if (this->isAllocatedFromStack() && rhs.isAllocatedFromStack())
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2018-12-26 02:47:16 +00:00
|
|
|
size_t min_size = std::min(this->size(), rhs.size());
|
|
|
|
size_t max_size = std::max(this->size(), rhs.size());
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
for (size_t i = 0; i < min_size; ++i)
|
|
|
|
std::swap(this->operator[](i), rhs[i]);
|
|
|
|
|
2018-12-26 02:47:16 +00:00
|
|
|
if (this->size() == max_size)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
for (size_t i = min_size; i < max_size; ++i)
|
|
|
|
rhs[i] = this->operator[](i);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
for (size_t i = min_size; i < max_size; ++i)
|
|
|
|
this->operator[](i) = rhs[i];
|
|
|
|
}
|
|
|
|
|
2018-12-26 02:47:16 +00:00
|
|
|
size_t lhs_size = this->size();
|
|
|
|
size_t lhs_allocated = this->allocated_bytes();
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
size_t rhs_size = rhs.size();
|
2017-07-13 16:49:09 +00:00
|
|
|
size_t rhs_allocated = rhs.allocated_bytes();
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-12-26 02:47:16 +00:00
|
|
|
this->c_end_of_storage = this->c_start + rhs_allocated - Base::pad_right;
|
|
|
|
rhs.c_end_of_storage = rhs.c_start + lhs_allocated - Base::pad_right;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-12-26 02:47:16 +00:00
|
|
|
this->c_end = this->c_start + this->byte_size(rhs_size);
|
|
|
|
rhs.c_end = rhs.c_start + this->byte_size(lhs_size);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2018-12-26 02:47:16 +00:00
|
|
|
else if (this->isAllocatedFromStack() && !rhs.isAllocatedFromStack())
|
2019-03-10 03:16:51 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
swap_stack_heap(*this, rhs);
|
2019-03-10 03:16:51 +00:00
|
|
|
}
|
2018-12-26 02:47:16 +00:00
|
|
|
else if (!this->isAllocatedFromStack() && rhs.isAllocatedFromStack())
|
2019-03-10 03:16:51 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
swap_stack_heap(rhs, *this);
|
2019-03-10 03:16:51 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
else
|
|
|
|
{
|
2018-12-26 02:47:16 +00:00
|
|
|
std::swap(this->c_start, rhs.c_start);
|
|
|
|
std::swap(this->c_end, rhs.c_end);
|
|
|
|
std::swap(this->c_end_of_storage, rhs.c_end_of_storage);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-12-19 07:42:46 +00:00
|
|
|
template <typename... TAllocatorParams>
|
|
|
|
void assign(size_t n, const T & x, TAllocatorParams &&... allocator_params)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2020-08-07 15:18:32 +00:00
|
|
|
this->resize_exact(n, std::forward<TAllocatorParams>(allocator_params)...);
|
2017-04-01 07:20:54 +00:00
|
|
|
std::fill(begin(), end(), x);
|
|
|
|
}
|
|
|
|
|
2020-02-28 19:14:13 +00:00
|
|
|
template <typename It1, typename It2, typename... TAllocatorParams>
|
2019-12-19 07:42:46 +00:00
|
|
|
void assign(It1 from_begin, It2 from_end, TAllocatorParams &&... allocator_params)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2020-08-06 13:34:23 +00:00
|
|
|
static_assert(memcpy_can_be_used_for_assignment<std::decay_t<T>, std::decay_t<decltype(*from_begin)>>);
|
2021-01-02 14:07:54 +00:00
|
|
|
this->assertNotIntersects(from_begin, from_end);
|
2020-08-03 17:15:08 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t required_capacity = from_end - from_begin;
|
2018-12-26 02:47:16 +00:00
|
|
|
if (required_capacity > this->capacity())
|
2020-08-07 15:18:32 +00:00
|
|
|
this->reserve_exact(required_capacity, std::forward<TAllocatorParams>(allocator_params)...);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-12-26 02:47:16 +00:00
|
|
|
size_t bytes_to_copy = this->byte_size(required_capacity);
|
2021-01-02 14:07:54 +00:00
|
|
|
if (bytes_to_copy)
|
|
|
|
{
|
|
|
|
memcpy(this->c_start, reinterpret_cast<const void *>(&*from_begin), bytes_to_copy);
|
|
|
|
this->c_end = this->c_start + bytes_to_copy;
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
2019-12-19 07:42:46 +00:00
|
|
|
// ISO C++ has strict ambiguity rules, thus we cannot apply TAllocatorParams here.
|
2017-04-01 07:20:54 +00:00
|
|
|
void assign(const PODArray & from)
|
|
|
|
{
|
|
|
|
assign(from.begin(), from.end());
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool operator== (const PODArray & other) const
|
|
|
|
{
|
2018-12-26 02:47:16 +00:00
|
|
|
if (this->size() != other.size())
|
2017-04-01 07:20:54 +00:00
|
|
|
return false;
|
|
|
|
|
|
|
|
const_iterator this_it = begin();
|
|
|
|
const_iterator that_it = other.begin();
|
|
|
|
|
|
|
|
while (this_it != end())
|
|
|
|
{
|
|
|
|
if (*this_it != *that_it)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
++this_it;
|
|
|
|
++that_it;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool operator!= (const PODArray & other) const
|
|
|
|
{
|
|
|
|
return !operator==(other);
|
|
|
|
}
|
2013-09-15 10:53:53 +00:00
|
|
|
};
|
|
|
|
|
2019-06-28 12:51:01 +00:00
|
|
|
template <typename T, size_t initial_bytes, typename TAllocator, size_t pad_right_>
|
|
|
|
void swap(PODArray<T, initial_bytes, TAllocator, pad_right_> & lhs, PODArray<T, initial_bytes, TAllocator, pad_right_> & rhs)
|
2016-07-08 15:10:47 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
lhs.swap(rhs);
|
2016-07-08 15:10:47 +00:00
|
|
|
}
|
2019-12-15 06:34:43 +00:00
|
|
|
#pragma GCC diagnostic pop
|
2017-02-15 11:23:38 +00:00
|
|
|
|
2021-02-11 21:54:50 +00:00
|
|
|
extern template class PODArray<UInt8, 4096, Allocator<false>, 15, 16>;
|
|
|
|
extern template class PODArray<UInt16, 4096, Allocator<false>, 15, 16>;
|
|
|
|
extern template class PODArray<UInt32, 4096, Allocator<false>, 15, 16>;
|
|
|
|
extern template class PODArray<UInt64, 4096, Allocator<false>, 15, 16>;
|
|
|
|
|
|
|
|
extern template class PODArray<Int8, 4096, Allocator<false>, 15, 16>;
|
|
|
|
extern template class PODArray<Int16, 4096, Allocator<false>, 15, 16>;
|
|
|
|
extern template class PODArray<Int32, 4096, Allocator<false>, 15, 16>;
|
|
|
|
extern template class PODArray<Int64, 4096, Allocator<false>, 15, 16>;
|
|
|
|
|
2013-09-15 10:53:53 +00:00
|
|
|
}
|