2018-11-16 12:22:51 +00:00
|
|
|
#include "ColumnVector.h"
|
|
|
|
|
2017-03-11 01:12:51 +00:00
|
|
|
#include <cstring>
|
|
|
|
#include <cmath>
|
2018-09-03 00:39:22 +00:00
|
|
|
#include <common/unaligned.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Common/Exception.h>
|
|
|
|
#include <Common/Arena.h>
|
|
|
|
#include <Common/SipHash.h>
|
|
|
|
#include <Common/NaNUtils.h>
|
2019-02-18 16:29:37 +00:00
|
|
|
#include <Common/RadixSort.h>
|
2019-08-21 02:28:04 +00:00
|
|
|
#include <Common/assert_cast.h>
|
2020-03-13 17:31:50 +00:00
|
|
|
#include <Common/WeakHash.h>
|
|
|
|
#include <Common/HashTable/Hash.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <IO/WriteBuffer.h>
|
|
|
|
#include <IO/WriteHelpers.h>
|
2018-11-16 12:22:51 +00:00
|
|
|
#include <Columns/ColumnsCommon.h>
|
2017-07-06 13:54:55 +00:00
|
|
|
#include <DataStreams/ColumnGathererStream.h>
|
2017-06-06 17:18:32 +00:00
|
|
|
#include <ext/bit_cast.h>
|
2019-02-02 13:52:20 +00:00
|
|
|
#include <pdqsort.h>
|
2017-03-11 01:29:45 +00:00
|
|
|
|
2019-01-04 12:10:00 +00:00
|
|
|
#ifdef __SSE2__
|
2017-04-01 07:20:54 +00:00
|
|
|
#include <emmintrin.h>
|
2017-03-11 01:12:51 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
extern const int PARAMETER_OUT_OF_BOUND;
|
|
|
|
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
|
2020-03-19 17:35:08 +00:00
|
|
|
extern const int LOGICAL_ERROR;
|
2017-03-11 01:12:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-08-07 13:57:28 +00:00
|
|
|
template <typename T>
|
|
|
|
StringRef ColumnVector<T>::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
|
2017-03-11 01:12:51 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
auto pos = arena.allocContinue(sizeof(T), begin);
|
2019-06-28 16:21:05 +00:00
|
|
|
unalignedStore<T>(pos, data[n]);
|
2017-04-01 07:20:54 +00:00
|
|
|
return StringRef(pos, sizeof(T));
|
2017-03-11 01:12:51 +00:00
|
|
|
}
|
|
|
|
|
2018-08-07 13:57:28 +00:00
|
|
|
template <typename T>
|
|
|
|
const char * ColumnVector<T>::deserializeAndInsertFromArena(const char * pos)
|
2017-03-11 01:12:51 +00:00
|
|
|
{
|
2018-09-03 00:39:22 +00:00
|
|
|
data.push_back(unalignedLoad<T>(pos));
|
2017-04-01 07:20:54 +00:00
|
|
|
return pos + sizeof(T);
|
2017-03-11 01:12:51 +00:00
|
|
|
}
|
|
|
|
|
2018-08-07 13:57:28 +00:00
|
|
|
template <typename T>
|
|
|
|
void ColumnVector<T>::updateHashWithValue(size_t n, SipHash & hash) const
|
2017-03-11 01:12:51 +00:00
|
|
|
{
|
2018-03-03 15:36:20 +00:00
|
|
|
hash.update(data[n]);
|
2017-03-11 01:12:51 +00:00
|
|
|
}
|
|
|
|
|
2020-03-13 17:31:50 +00:00
|
|
|
template <typename T>
|
|
|
|
void ColumnVector<T>::updateWeakHash32(WeakHash32 & hash) const
|
|
|
|
{
|
|
|
|
auto s = data.size();
|
|
|
|
|
|
|
|
if (hash.getData().size() != s)
|
|
|
|
throw Exception("Size of WeakHash32 does not match size of column: column size is " + std::to_string(s) +
|
|
|
|
", hash size is " + std::to_string(hash.getData().size()), ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
2020-03-25 11:14:11 +00:00
|
|
|
const T * begin = data.data();
|
2020-03-13 17:31:50 +00:00
|
|
|
const T * end = begin + s;
|
2020-03-25 11:14:11 +00:00
|
|
|
UInt32 * hash_data = hash.getData().data();
|
2020-03-13 17:31:50 +00:00
|
|
|
|
|
|
|
while (begin < end)
|
|
|
|
{
|
2020-03-23 19:18:35 +00:00
|
|
|
*hash_data = intHashCRC32(*begin, *hash_data);
|
2020-03-13 17:31:50 +00:00
|
|
|
++begin;
|
|
|
|
++hash_data;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-08-07 13:57:28 +00:00
|
|
|
template <typename T>
|
|
|
|
struct ColumnVector<T>::less
|
2017-03-11 01:12:51 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
const Self & parent;
|
|
|
|
int nan_direction_hint;
|
|
|
|
less(const Self & parent_, int nan_direction_hint_) : parent(parent_), nan_direction_hint(nan_direction_hint_) {}
|
|
|
|
bool operator()(size_t lhs, size_t rhs) const { return CompareHelper<T>::less(parent.data[lhs], parent.data[rhs], nan_direction_hint); }
|
2017-03-11 01:12:51 +00:00
|
|
|
};
|
|
|
|
|
2018-08-07 13:57:28 +00:00
|
|
|
template <typename T>
|
|
|
|
struct ColumnVector<T>::greater
|
2017-03-11 01:12:51 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
const Self & parent;
|
|
|
|
int nan_direction_hint;
|
|
|
|
greater(const Self & parent_, int nan_direction_hint_) : parent(parent_), nan_direction_hint(nan_direction_hint_) {}
|
|
|
|
bool operator()(size_t lhs, size_t rhs) const { return CompareHelper<T>::greater(parent.data[lhs], parent.data[rhs], nan_direction_hint); }
|
2017-03-11 01:12:51 +00:00
|
|
|
};
|
|
|
|
|
2019-04-25 01:16:26 +00:00
|
|
|
|
|
|
|
namespace
|
|
|
|
{
|
|
|
|
template <typename T>
|
|
|
|
struct ValueWithIndex
|
|
|
|
{
|
|
|
|
T value;
|
|
|
|
UInt32 index;
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
struct RadixSortTraits : RadixSortNumTraits<T>
|
|
|
|
{
|
|
|
|
using Element = ValueWithIndex<T>;
|
|
|
|
static T & extractKey(Element & elem) { return elem.value; }
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2018-08-07 13:57:28 +00:00
|
|
|
template <typename T>
|
2019-02-18 19:44:26 +00:00
|
|
|
void ColumnVector<T>::getPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const
|
2017-03-11 01:12:51 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t s = data.size();
|
|
|
|
res.resize(s);
|
|
|
|
|
2019-04-25 01:16:26 +00:00
|
|
|
if (s == 0)
|
|
|
|
return;
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (limit >= s)
|
|
|
|
limit = 0;
|
|
|
|
|
|
|
|
if (limit)
|
|
|
|
{
|
2019-02-18 16:29:37 +00:00
|
|
|
for (size_t i = 0; i < s; ++i)
|
|
|
|
res[i] = i;
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (reverse)
|
|
|
|
std::partial_sort(res.begin(), res.begin() + limit, res.end(), greater(*this, nan_direction_hint));
|
|
|
|
else
|
|
|
|
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less(*this, nan_direction_hint));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2019-04-25 01:16:26 +00:00
|
|
|
/// A case for radix sort
|
2019-11-02 05:55:06 +00:00
|
|
|
if constexpr (is_arithmetic_v<T> && !std::is_same_v<T, UInt128>)
|
2019-02-18 16:29:37 +00:00
|
|
|
{
|
2019-04-25 01:16:26 +00:00
|
|
|
/// Thresholds on size. Lower threshold is arbitrary. Upper threshold is chosen by the type for histogram counters.
|
|
|
|
if (s >= 256 && s <= std::numeric_limits<UInt32>::max())
|
|
|
|
{
|
|
|
|
PaddedPODArray<ValueWithIndex<T>> pairs(s);
|
2020-03-18 16:50:08 +00:00
|
|
|
for (UInt32 i = 0; i < UInt32(s); ++i)
|
2019-04-25 01:16:26 +00:00
|
|
|
pairs[i] = {data[i], i};
|
|
|
|
|
2019-04-27 17:52:43 +00:00
|
|
|
RadixSort<RadixSortTraits<T>>::executeLSD(pairs.data(), s);
|
2019-04-25 01:16:26 +00:00
|
|
|
|
|
|
|
/// Radix sort treats all NaNs to be greater than all numbers.
|
|
|
|
/// If the user needs the opposite, we must move them accordingly.
|
|
|
|
size_t nans_to_move = 0;
|
|
|
|
if (std::is_floating_point_v<T> && nan_direction_hint < 0)
|
|
|
|
{
|
|
|
|
for (ssize_t i = s - 1; i >= 0; --i)
|
|
|
|
{
|
|
|
|
if (isNaN(pairs[i].value))
|
|
|
|
++nans_to_move;
|
|
|
|
else
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (reverse)
|
|
|
|
{
|
|
|
|
if (nans_to_move)
|
|
|
|
{
|
|
|
|
for (size_t i = 0; i < s - nans_to_move; ++i)
|
|
|
|
res[i] = pairs[s - nans_to_move - 1 - i].index;
|
|
|
|
for (size_t i = s - nans_to_move; i < s; ++i)
|
|
|
|
res[i] = pairs[s - 1 - (i - (s - nans_to_move))].index;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
for (size_t i = 0; i < s; ++i)
|
|
|
|
res[s - 1 - i] = pairs[i].index;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (nans_to_move)
|
|
|
|
{
|
|
|
|
for (size_t i = 0; i < nans_to_move; ++i)
|
|
|
|
res[i] = pairs[i + s - nans_to_move].index;
|
|
|
|
for (size_t i = nans_to_move; i < s; ++i)
|
|
|
|
res[i] = pairs[i - nans_to_move].index;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
for (size_t i = 0; i < s; ++i)
|
|
|
|
res[i] = pairs[i].index;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
2019-02-18 16:29:37 +00:00
|
|
|
}
|
|
|
|
|
2019-04-25 01:16:26 +00:00
|
|
|
/// Default sorting algorithm.
|
|
|
|
for (size_t i = 0; i < s; ++i)
|
|
|
|
res[i] = i;
|
|
|
|
|
|
|
|
if (reverse)
|
|
|
|
pdqsort(res.begin(), res.end(), greater(*this, nan_direction_hint));
|
|
|
|
else
|
|
|
|
pdqsort(res.begin(), res.end(), less(*this, nan_direction_hint));
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2017-03-11 01:12:51 +00:00
|
|
|
}
|
|
|
|
|
2019-04-25 01:16:26 +00:00
|
|
|
|
2018-08-07 13:57:28 +00:00
|
|
|
template <typename T>
|
|
|
|
const char * ColumnVector<T>::getFamilyName() const
|
2017-03-11 01:12:51 +00:00
|
|
|
{
|
2017-12-07 22:11:51 +00:00
|
|
|
return TypeName<T>::get();
|
2017-03-11 01:12:51 +00:00
|
|
|
}
|
|
|
|
|
2018-08-07 13:57:28 +00:00
|
|
|
template <typename T>
|
|
|
|
MutableColumnPtr ColumnVector<T>::cloneResized(size_t size) const
|
2017-03-11 01:12:51 +00:00
|
|
|
{
|
2017-12-15 19:46:24 +00:00
|
|
|
auto res = this->create();
|
2017-03-11 01:12:51 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (size > 0)
|
|
|
|
{
|
2017-12-14 03:56:56 +00:00
|
|
|
auto & new_col = static_cast<Self &>(*res);
|
2017-04-01 07:20:54 +00:00
|
|
|
new_col.data.resize(size);
|
2017-03-11 01:12:51 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t count = std::min(this->size(), size);
|
2018-09-02 03:33:48 +00:00
|
|
|
memcpy(new_col.data.data(), data.data(), count * sizeof(data[0]));
|
2017-03-11 01:12:51 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (size > count)
|
2019-11-20 08:57:19 +00:00
|
|
|
memset(static_cast<void *>(&new_col.data[count]), static_cast<int>(ValueType()), (size - count) * sizeof(ValueType));
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2017-03-11 01:12:51 +00:00
|
|
|
|
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
|
|
|
return res;
|
2017-03-11 01:12:51 +00:00
|
|
|
}
|
|
|
|
|
2018-08-07 13:57:28 +00:00
|
|
|
template <typename T>
|
|
|
|
UInt64 ColumnVector<T>::get64(size_t n) const
|
2017-03-11 01:12:51 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
return ext::bit_cast<UInt64>(data[n]);
|
2017-03-11 01:12:51 +00:00
|
|
|
}
|
|
|
|
|
2019-05-30 21:59:40 +00:00
|
|
|
template <typename T>
|
2020-01-14 17:17:56 +00:00
|
|
|
inline Float64 ColumnVector<T>::getFloat64(size_t n) const
|
2019-05-30 21:59:40 +00:00
|
|
|
{
|
|
|
|
return static_cast<Float64>(data[n]);
|
|
|
|
}
|
|
|
|
|
2019-12-03 01:55:46 +00:00
|
|
|
template <typename T>
|
|
|
|
Float32 ColumnVector<T>::getFloat32(size_t n) const
|
|
|
|
{
|
|
|
|
return static_cast<Float32>(data[n]);
|
|
|
|
}
|
|
|
|
|
2018-08-07 13:57:28 +00:00
|
|
|
template <typename T>
|
|
|
|
void ColumnVector<T>::insertRangeFrom(const IColumn & src, size_t start, size_t length)
|
2017-03-11 01:12:51 +00:00
|
|
|
{
|
2019-08-21 02:28:04 +00:00
|
|
|
const ColumnVector & src_vec = assert_cast<const ColumnVector &>(src);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
if (start + length > src_vec.data.size())
|
|
|
|
throw Exception("Parameters start = "
|
|
|
|
+ toString(start) + ", length = "
|
|
|
|
+ toString(length) + " are out of bound in ColumnVector<T>::insertRangeFrom method"
|
|
|
|
" (data.size() = " + toString(src_vec.data.size()) + ").",
|
|
|
|
ErrorCodes::PARAMETER_OUT_OF_BOUND);
|
|
|
|
|
|
|
|
size_t old_size = data.size();
|
|
|
|
data.resize(old_size + length);
|
2019-03-07 20:04:59 +00:00
|
|
|
memcpy(data.data() + old_size, &src_vec.data[start], length * sizeof(data[0]));
|
2017-03-11 01:12:51 +00:00
|
|
|
}
|
|
|
|
|
2018-08-07 13:57:28 +00:00
|
|
|
template <typename T>
|
|
|
|
ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_size_hint) const
|
2017-03-11 01:12:51 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t size = data.size();
|
|
|
|
if (size != filt.size())
|
|
|
|
throw Exception("Size of filter doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
|
2017-03-11 01:12:51 +00:00
|
|
|
|
2017-12-15 19:46:24 +00:00
|
|
|
auto res = this->create();
|
2017-12-15 21:32:25 +00:00
|
|
|
Container & res_data = res->getData();
|
2017-03-11 01:12:51 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (result_size_hint)
|
|
|
|
res_data.reserve(result_size_hint > 0 ? result_size_hint : size);
|
2017-03-11 01:12:51 +00:00
|
|
|
|
2018-09-02 03:00:04 +00:00
|
|
|
const UInt8 * filt_pos = filt.data();
|
2017-04-01 07:20:54 +00:00
|
|
|
const UInt8 * filt_end = filt_pos + size;
|
2018-09-02 03:00:04 +00:00
|
|
|
const T * data_pos = data.data();
|
2017-03-11 01:12:51 +00:00
|
|
|
|
2019-01-04 12:10:00 +00:00
|
|
|
#ifdef __SSE2__
|
2017-04-01 07:20:54 +00:00
|
|
|
/** A slightly more optimized version.
|
|
|
|
* Based on the assumption that often pieces of consecutive values
|
|
|
|
* completely pass or do not pass the filter.
|
|
|
|
* Therefore, we will optimistically check the parts of `SIMD_BYTES` values.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static constexpr size_t SIMD_BYTES = 16;
|
|
|
|
const __m128i zero16 = _mm_setzero_si128();
|
|
|
|
const UInt8 * filt_end_sse = filt_pos + size / SIMD_BYTES * SIMD_BYTES;
|
|
|
|
|
|
|
|
while (filt_pos < filt_end_sse)
|
|
|
|
{
|
|
|
|
int mask = _mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
|
|
|
|
|
|
|
|
if (0 == mask)
|
|
|
|
{
|
|
|
|
/// Nothing is inserted.
|
|
|
|
}
|
|
|
|
else if (0xFFFF == mask)
|
|
|
|
{
|
|
|
|
res_data.insert(data_pos, data_pos + SIMD_BYTES);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
for (size_t i = 0; i < SIMD_BYTES; ++i)
|
|
|
|
if (filt_pos[i])
|
|
|
|
res_data.push_back(data_pos[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
filt_pos += SIMD_BYTES;
|
|
|
|
data_pos += SIMD_BYTES;
|
|
|
|
}
|
2017-03-11 01:12:51 +00:00
|
|
|
#endif
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
while (filt_pos < filt_end)
|
|
|
|
{
|
|
|
|
if (*filt_pos)
|
|
|
|
res_data.push_back(*data_pos);
|
2017-03-11 01:12:51 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
++filt_pos;
|
|
|
|
++data_pos;
|
|
|
|
}
|
2017-03-11 01:12:51 +00:00
|
|
|
|
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
|
|
|
return res;
|
2017-03-11 01:12:51 +00:00
|
|
|
}
|
|
|
|
|
2018-08-07 13:57:28 +00:00
|
|
|
template <typename T>
|
2019-02-18 19:44:26 +00:00
|
|
|
ColumnPtr ColumnVector<T>::permute(const IColumn::Permutation & perm, size_t limit) const
|
2017-03-11 01:12:51 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t size = data.size();
|
2017-03-11 01:12:51 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (limit == 0)
|
|
|
|
limit = size;
|
|
|
|
else
|
|
|
|
limit = std::min(size, limit);
|
2017-03-11 01:12:51 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (perm.size() < limit)
|
|
|
|
throw Exception("Size of permutation is less than required.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
|
2017-03-11 01:12:51 +00:00
|
|
|
|
2017-12-15 19:46:24 +00:00
|
|
|
auto res = this->create(limit);
|
2017-12-15 21:32:25 +00:00
|
|
|
typename Self::Container & res_data = res->getData();
|
2017-04-01 07:20:54 +00:00
|
|
|
for (size_t i = 0; i < limit; ++i)
|
|
|
|
res_data[i] = data[perm[i]];
|
2017-03-11 01:12:51 +00:00
|
|
|
|
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
|
|
|
return res;
|
2017-03-11 01:12:51 +00:00
|
|
|
}
|
|
|
|
|
2018-04-18 21:00:47 +00:00
|
|
|
template <typename T>
|
2019-02-18 17:28:53 +00:00
|
|
|
ColumnPtr ColumnVector<T>::index(const IColumn & indexes, size_t limit) const
|
2018-04-18 21:00:47 +00:00
|
|
|
{
|
|
|
|
return selectIndexImpl(*this, indexes, limit);
|
|
|
|
}
|
|
|
|
|
2018-08-07 13:57:28 +00:00
|
|
|
template <typename T>
|
|
|
|
ColumnPtr ColumnVector<T>::replicate(const IColumn::Offsets & offsets) const
|
2017-03-11 01:12:51 +00:00
|
|
|
{
|
2020-02-28 13:27:01 +00:00
|
|
|
const size_t size = data.size();
|
2017-04-01 07:20:54 +00:00
|
|
|
if (size != offsets.size())
|
|
|
|
throw Exception("Size of offsets doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
|
2017-03-11 01:12:51 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (0 == size)
|
2017-12-15 19:46:24 +00:00
|
|
|
return this->create();
|
2017-03-11 01:12:51 +00:00
|
|
|
|
2020-02-21 19:47:20 +00:00
|
|
|
auto res = this->create(offsets.back());
|
2017-03-11 01:12:51 +00:00
|
|
|
|
2020-03-02 16:51:03 +00:00
|
|
|
auto it = res->getData().begin();
|
2017-04-01 07:20:54 +00:00
|
|
|
for (size_t i = 0; i < size; ++i)
|
|
|
|
{
|
2020-03-02 16:51:03 +00:00
|
|
|
const auto span_end = res->getData().begin() + offsets[i];
|
|
|
|
for (; it != span_end; ++it)
|
|
|
|
*it = data[i];
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2017-03-11 01:12:51 +00:00
|
|
|
|
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
|
|
|
return res;
|
2017-03-11 01:12:51 +00:00
|
|
|
}
|
|
|
|
|
2018-08-07 13:57:28 +00:00
|
|
|
template <typename T>
|
|
|
|
void ColumnVector<T>::gather(ColumnGathererStream & gatherer)
|
2017-07-06 13:54:55 +00:00
|
|
|
{
|
|
|
|
gatherer.gather(*this);
|
|
|
|
}
|
|
|
|
|
2018-08-07 13:57:28 +00:00
|
|
|
template <typename T>
|
|
|
|
void ColumnVector<T>::getExtremes(Field & min, Field & max) const
|
2017-03-11 01:12:51 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t size = data.size();
|
|
|
|
|
|
|
|
if (size == 0)
|
|
|
|
{
|
2018-10-22 08:54:54 +00:00
|
|
|
min = T(0);
|
|
|
|
max = T(0);
|
2017-04-01 07:20:54 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool has_value = false;
|
|
|
|
|
|
|
|
/** Skip all NaNs in extremes calculation.
|
|
|
|
* If all values are NaNs, then return NaN.
|
|
|
|
* NOTE: There exist many different NaNs.
|
|
|
|
* Different NaN could be returned: not bit-exact value as one of NaNs from column.
|
|
|
|
*/
|
|
|
|
|
|
|
|
T cur_min = NaNOrZero<T>();
|
|
|
|
T cur_max = NaNOrZero<T>();
|
|
|
|
|
|
|
|
for (const T x : data)
|
|
|
|
{
|
|
|
|
if (isNaN(x))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (!has_value)
|
|
|
|
{
|
|
|
|
cur_min = x;
|
|
|
|
cur_max = x;
|
|
|
|
has_value = true;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (x < cur_min)
|
|
|
|
cur_min = x;
|
2017-09-14 11:52:22 +00:00
|
|
|
else if (x > cur_max)
|
2017-04-01 07:20:54 +00:00
|
|
|
cur_max = x;
|
|
|
|
}
|
|
|
|
|
2018-11-20 20:09:20 +00:00
|
|
|
min = NearestFieldType<T>(cur_min);
|
|
|
|
max = NearestFieldType<T>(cur_max);
|
2017-03-11 01:12:51 +00:00
|
|
|
}
|
|
|
|
|
2017-04-18 03:03:39 +00:00
|
|
|
/// Explicit template instantiations - to avoid code bloat in headers.
|
2018-08-07 13:57:28 +00:00
|
|
|
template class ColumnVector<UInt8>;
|
|
|
|
template class ColumnVector<UInt16>;
|
|
|
|
template class ColumnVector<UInt32>;
|
|
|
|
template class ColumnVector<UInt64>;
|
|
|
|
template class ColumnVector<UInt128>;
|
|
|
|
template class ColumnVector<Int8>;
|
|
|
|
template class ColumnVector<Int16>;
|
|
|
|
template class ColumnVector<Int32>;
|
|
|
|
template class ColumnVector<Int64>;
|
|
|
|
template class ColumnVector<Int128>;
|
|
|
|
template class ColumnVector<Float32>;
|
|
|
|
template class ColumnVector<Float64>;
|
2017-03-11 01:12:51 +00:00
|
|
|
}
|