2018-11-16 12:22:51 +00:00
|
|
|
#include "ColumnVector.h"
|
|
|
|
|
2020-11-13 11:28:18 +00:00
|
|
|
#include <Columns/ColumnsCommon.h>
|
2021-02-12 00:25:00 +00:00
|
|
|
#include <Columns/ColumnCompressed.h>
|
2021-04-27 12:49:58 +00:00
|
|
|
#include <Columns/MaskOperations.h>
|
2021-10-08 14:03:54 +00:00
|
|
|
#include <Processors/Transforms/ColumnGathererTransform.h>
|
2020-11-13 11:28:18 +00:00
|
|
|
#include <IO/WriteHelpers.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Common/Arena.h>
|
2020-11-13 11:28:18 +00:00
|
|
|
#include <Common/Exception.h>
|
|
|
|
#include <Common/HashTable/Hash.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Common/NaNUtils.h>
|
2019-02-18 16:29:37 +00:00
|
|
|
#include <Common/RadixSort.h>
|
2020-11-13 11:28:18 +00:00
|
|
|
#include <Common/SipHash.h>
|
2020-03-13 17:31:50 +00:00
|
|
|
#include <Common/WeakHash.h>
|
2020-11-13 11:28:18 +00:00
|
|
|
#include <Common/assert_cast.h>
|
2021-10-02 07:13:14 +00:00
|
|
|
#include <base/sort.h>
|
|
|
|
#include <base/unaligned.h>
|
|
|
|
#include <base/bit_cast.h>
|
|
|
|
#include <base/scope_guard.h>
|
2017-03-11 01:29:45 +00:00
|
|
|
|
2020-11-13 11:28:18 +00:00
|
|
|
#include <cmath>
|
|
|
|
#include <cstring>
|
|
|
|
|
|
|
|
#if defined(__SSE2__)
|
|
|
|
# include <emmintrin.h>
|
2017-03-11 01:12:51 +00:00
|
|
|
#endif
|
|
|
|
|
2022-02-09 16:32:52 +00:00
|
|
|
#if USE_EMBEDDED_COMPILER
|
|
|
|
#include <DataTypes/Native.h>
|
|
|
|
#include <llvm/IR/IRBuilder.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
2017-03-11 01:12:51 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
extern const int PARAMETER_OUT_OF_BOUND;
|
|
|
|
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
|
2020-03-19 17:35:08 +00:00
|
|
|
extern const int LOGICAL_ERROR;
|
2021-05-03 23:46:11 +00:00
|
|
|
extern const int NOT_IMPLEMENTED;
|
2017-03-11 01:12:51 +00:00
|
|
|
}
|
|
|
|
|
2018-08-07 13:57:28 +00:00
|
|
|
template <typename T>
|
|
|
|
StringRef ColumnVector<T>::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
|
2017-03-11 01:12:51 +00:00
|
|
|
{
|
2021-01-26 18:22:40 +00:00
|
|
|
auto * pos = arena.allocContinue(sizeof(T), begin);
|
|
|
|
unalignedStore<T>(pos, data[n]);
|
|
|
|
return StringRef(pos, sizeof(T));
|
2017-03-11 01:12:51 +00:00
|
|
|
}
|
|
|
|
|
2018-08-07 13:57:28 +00:00
|
|
|
template <typename T>
|
|
|
|
const char * ColumnVector<T>::deserializeAndInsertFromArena(const char * pos)
|
2017-03-11 01:12:51 +00:00
|
|
|
{
|
2021-01-26 18:22:40 +00:00
|
|
|
data.emplace_back(unalignedLoad<T>(pos));
|
|
|
|
return pos + sizeof(T);
|
2017-03-11 01:12:51 +00:00
|
|
|
}
|
|
|
|
|
2021-02-16 21:26:06 +00:00
|
|
|
template <typename T>
|
|
|
|
const char * ColumnVector<T>::skipSerializedInArena(const char * pos) const
|
|
|
|
{
|
|
|
|
return pos + sizeof(T);
|
|
|
|
}
|
|
|
|
|
2018-08-07 13:57:28 +00:00
|
|
|
template <typename T>
|
|
|
|
void ColumnVector<T>::updateHashWithValue(size_t n, SipHash & hash) const
|
2017-03-11 01:12:51 +00:00
|
|
|
{
|
2018-03-03 15:36:20 +00:00
|
|
|
hash.update(data[n]);
|
2017-03-11 01:12:51 +00:00
|
|
|
}
|
|
|
|
|
2020-03-13 17:31:50 +00:00
|
|
|
template <typename T>
|
|
|
|
void ColumnVector<T>::updateWeakHash32(WeakHash32 & hash) const
|
|
|
|
{
|
|
|
|
auto s = data.size();
|
|
|
|
|
|
|
|
if (hash.getData().size() != s)
|
|
|
|
throw Exception("Size of WeakHash32 does not match size of column: column size is " + std::to_string(s) +
|
|
|
|
", hash size is " + std::to_string(hash.getData().size()), ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
2020-03-25 11:14:11 +00:00
|
|
|
const T * begin = data.data();
|
2020-03-13 17:31:50 +00:00
|
|
|
const T * end = begin + s;
|
2020-03-25 11:14:11 +00:00
|
|
|
UInt32 * hash_data = hash.getData().data();
|
2020-03-13 17:31:50 +00:00
|
|
|
|
|
|
|
while (begin < end)
|
|
|
|
{
|
2020-03-23 19:18:35 +00:00
|
|
|
*hash_data = intHashCRC32(*begin, *hash_data);
|
2020-03-13 17:31:50 +00:00
|
|
|
++begin;
|
|
|
|
++hash_data;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-20 22:16:08 +00:00
|
|
|
template <typename T>
|
|
|
|
void ColumnVector<T>::updateHashFast(SipHash & hash) const
|
|
|
|
{
|
|
|
|
hash.update(reinterpret_cast<const char *>(data.data()), size() * sizeof(data[0]));
|
|
|
|
}
|
|
|
|
|
2018-08-07 13:57:28 +00:00
|
|
|
template <typename T>
|
|
|
|
struct ColumnVector<T>::less
|
2017-03-11 01:12:51 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
const Self & parent;
|
|
|
|
int nan_direction_hint;
|
|
|
|
less(const Self & parent_, int nan_direction_hint_) : parent(parent_), nan_direction_hint(nan_direction_hint_) {}
|
|
|
|
bool operator()(size_t lhs, size_t rhs) const { return CompareHelper<T>::less(parent.data[lhs], parent.data[rhs], nan_direction_hint); }
|
2017-03-11 01:12:51 +00:00
|
|
|
};
|
|
|
|
|
2022-02-23 17:34:19 +00:00
|
|
|
template <typename T>
|
|
|
|
struct ColumnVector<T>::less_stable
|
|
|
|
{
|
|
|
|
const Self & parent;
|
|
|
|
int nan_direction_hint;
|
|
|
|
less_stable(const Self & parent_, int nan_direction_hint_) : parent(parent_), nan_direction_hint(nan_direction_hint_) {}
|
|
|
|
bool operator()(size_t lhs, size_t rhs) const
|
|
|
|
{
|
2022-02-25 19:55:34 +00:00
|
|
|
if (unlikely(parent.data[lhs] == parent.data[rhs]))
|
2022-02-23 17:34:19 +00:00
|
|
|
return lhs < rhs;
|
|
|
|
|
2022-03-03 13:02:31 +00:00
|
|
|
if constexpr (std::is_floating_point_v<T>)
|
|
|
|
{
|
2022-03-12 18:04:08 +00:00
|
|
|
if (unlikely(std::isnan(parent.data[lhs]) && std::isnan(parent.data[rhs])))
|
|
|
|
{
|
2022-03-03 13:02:31 +00:00
|
|
|
return lhs < rhs;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-02-23 17:34:19 +00:00
|
|
|
return CompareHelper<T>::less(parent.data[lhs], parent.data[rhs], nan_direction_hint);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2018-08-07 13:57:28 +00:00
|
|
|
template <typename T>
|
|
|
|
struct ColumnVector<T>::greater
|
2017-03-11 01:12:51 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
const Self & parent;
|
|
|
|
int nan_direction_hint;
|
|
|
|
greater(const Self & parent_, int nan_direction_hint_) : parent(parent_), nan_direction_hint(nan_direction_hint_) {}
|
|
|
|
bool operator()(size_t lhs, size_t rhs) const { return CompareHelper<T>::greater(parent.data[lhs], parent.data[rhs], nan_direction_hint); }
|
2017-03-11 01:12:51 +00:00
|
|
|
};
|
|
|
|
|
2022-02-23 17:34:19 +00:00
|
|
|
template <typename T>
|
|
|
|
struct ColumnVector<T>::greater_stable
|
|
|
|
{
|
|
|
|
const Self & parent;
|
|
|
|
int nan_direction_hint;
|
|
|
|
greater_stable(const Self & parent_, int nan_direction_hint_) : parent(parent_), nan_direction_hint(nan_direction_hint_) {}
|
|
|
|
bool operator()(size_t lhs, size_t rhs) const
|
|
|
|
{
|
2022-02-25 19:55:34 +00:00
|
|
|
if (unlikely(parent.data[lhs] == parent.data[rhs]))
|
2022-02-23 17:34:19 +00:00
|
|
|
return lhs < rhs;
|
|
|
|
|
2022-03-03 13:02:31 +00:00
|
|
|
if constexpr (std::is_floating_point_v<T>)
|
|
|
|
{
|
2022-03-12 18:04:08 +00:00
|
|
|
if (unlikely(std::isnan(parent.data[lhs]) && std::isnan(parent.data[rhs])))
|
|
|
|
{
|
2022-03-03 13:02:31 +00:00
|
|
|
return lhs < rhs;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-02-23 17:34:19 +00:00
|
|
|
return CompareHelper<T>::greater(parent.data[lhs], parent.data[rhs], nan_direction_hint);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2021-09-30 20:42:50 +00:00
|
|
|
template <typename T>
|
|
|
|
struct ColumnVector<T>::equals
|
|
|
|
{
|
|
|
|
const Self & parent;
|
|
|
|
int nan_direction_hint;
|
|
|
|
equals(const Self & parent_, int nan_direction_hint_) : parent(parent_), nan_direction_hint(nan_direction_hint_) {}
|
|
|
|
bool operator()(size_t lhs, size_t rhs) const { return CompareHelper<T>::equals(parent.data[lhs], parent.data[rhs], nan_direction_hint); }
|
|
|
|
};
|
|
|
|
|
2019-04-25 01:16:26 +00:00
|
|
|
namespace
|
|
|
|
{
|
|
|
|
template <typename T>
|
|
|
|
struct ValueWithIndex
|
|
|
|
{
|
|
|
|
T value;
|
|
|
|
UInt32 index;
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
struct RadixSortTraits : RadixSortNumTraits<T>
|
|
|
|
{
|
|
|
|
using Element = ValueWithIndex<T>;
|
2020-05-23 14:28:05 +00:00
|
|
|
using Result = size_t;
|
2020-05-23 15:22:04 +00:00
|
|
|
|
2019-04-25 01:16:26 +00:00
|
|
|
static T & extractKey(Element & elem) { return elem.value; }
|
2020-05-23 14:28:05 +00:00
|
|
|
static size_t extractResult(Element & elem) { return elem.index; }
|
2019-04-25 01:16:26 +00:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2022-02-09 16:32:52 +00:00
|
|
|
#if USE_EMBEDDED_COMPILER
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
bool ColumnVector<T>::isComparatorCompilable() const
|
|
|
|
{
|
|
|
|
/// TODO: for std::is_floating_point_v<T> we need implement is_nan in LLVM IR.
|
2022-02-17 19:29:21 +00:00
|
|
|
return std::is_integral_v<T>;
|
2022-02-09 16:32:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
llvm::Value * ColumnVector<T>::compileComparator(llvm::IRBuilderBase & builder, llvm::Value * lhs, llvm::Value * rhs, llvm::Value *) const
|
|
|
|
{
|
|
|
|
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
|
|
|
|
|
2022-05-11 13:35:37 +00:00
|
|
|
if constexpr (std::is_integral_v<T>)
|
2022-02-09 16:32:52 +00:00
|
|
|
{
|
|
|
|
// a > b ? 1 : (a < b ? -1 : 0);
|
|
|
|
|
|
|
|
bool is_signed = std::is_signed_v<T>;
|
|
|
|
|
|
|
|
auto * lhs_greater_than_rhs_result = llvm::ConstantInt::getSigned(b.getInt8Ty(), 1);
|
|
|
|
auto * lhs_less_than_rhs_result = llvm::ConstantInt::getSigned(b.getInt8Ty(), -1);
|
|
|
|
auto * lhs_equals_rhs_result = llvm::ConstantInt::getSigned(b.getInt8Ty(), 0);
|
|
|
|
|
|
|
|
auto * lhs_greater_than_rhs = is_signed ? b.CreateICmpSGT(lhs, rhs) : b.CreateICmpUGT(lhs, rhs);
|
|
|
|
auto * lhs_less_than_rhs = is_signed ? b.CreateICmpSLT(lhs, rhs) : b.CreateICmpULT(lhs, rhs);
|
|
|
|
auto * if_lhs_less_than_rhs_result = b.CreateSelect(lhs_less_than_rhs, lhs_less_than_rhs_result, lhs_equals_rhs_result);
|
|
|
|
|
|
|
|
return b.CreateSelect(lhs_greater_than_rhs, lhs_greater_than_rhs_result, if_lhs_less_than_rhs_result);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Method compileComparator is not supported for type {}", TypeName<T>);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
2020-05-18 21:41:23 +00:00
|
|
|
|
2018-08-07 13:57:28 +00:00
|
|
|
template <typename T>
|
2022-02-23 17:34:19 +00:00
|
|
|
void ColumnVector<T>::getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
|
|
|
|
size_t limit, int nan_direction_hint, IColumn::Permutation & res) const
|
2017-03-11 01:12:51 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t s = data.size();
|
|
|
|
res.resize(s);
|
|
|
|
|
2019-04-25 01:16:26 +00:00
|
|
|
if (s == 0)
|
|
|
|
return;
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (limit >= s)
|
|
|
|
limit = 0;
|
|
|
|
|
|
|
|
if (limit)
|
|
|
|
{
|
2019-02-18 16:29:37 +00:00
|
|
|
for (size_t i = 0; i < s; ++i)
|
|
|
|
res[i] = i;
|
|
|
|
|
2022-02-23 17:34:19 +00:00
|
|
|
if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable)
|
2022-01-30 19:49:48 +00:00
|
|
|
::partial_sort(res.begin(), res.begin() + limit, res.end(), less(*this, nan_direction_hint));
|
2022-02-23 17:34:19 +00:00
|
|
|
else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable)
|
|
|
|
::partial_sort(res.begin(), res.begin() + limit, res.end(), less_stable(*this, nan_direction_hint));
|
|
|
|
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Unstable)
|
|
|
|
::partial_sort(res.begin(), res.begin() + limit, res.end(), greater(*this, nan_direction_hint));
|
|
|
|
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Stable)
|
|
|
|
::partial_sort(res.begin(), res.begin() + limit, res.end(), greater_stable(*this, nan_direction_hint));
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2019-04-25 01:16:26 +00:00
|
|
|
/// A case for radix sort
|
2022-02-23 17:34:19 +00:00
|
|
|
/// LSD RadixSort is stable
|
2021-05-06 00:31:09 +00:00
|
|
|
if constexpr (is_arithmetic_v<T> && !is_big_int_v<T>)
|
2019-02-18 16:29:37 +00:00
|
|
|
{
|
2022-02-23 17:34:19 +00:00
|
|
|
bool reverse = direction == IColumn::PermutationSortDirection::Descending;
|
2022-03-03 13:02:31 +00:00
|
|
|
bool ascending = direction == IColumn::PermutationSortDirection::Ascending;
|
|
|
|
bool sort_is_stable = stability == IColumn::PermutationSortStability::Stable;
|
|
|
|
|
|
|
|
/// TODO: LSD RadixSort is currently not stable if direction is descending, or value is floating point
|
2022-03-11 21:16:25 +00:00
|
|
|
bool use_radix_sort = (sort_is_stable && ascending && !std::is_floating_point_v<T>) || !sort_is_stable;
|
2022-02-23 17:34:19 +00:00
|
|
|
|
2019-04-25 01:16:26 +00:00
|
|
|
/// Thresholds on size. Lower threshold is arbitrary. Upper threshold is chosen by the type for histogram counters.
|
2022-03-11 21:16:25 +00:00
|
|
|
if (s >= 256 && s <= std::numeric_limits<UInt32>::max() && use_radix_sort)
|
2019-04-25 01:16:26 +00:00
|
|
|
{
|
|
|
|
PaddedPODArray<ValueWithIndex<T>> pairs(s);
|
2022-04-18 08:18:31 +00:00
|
|
|
for (UInt32 i = 0; i < static_cast<UInt32>(s); ++i)
|
2019-04-25 01:16:26 +00:00
|
|
|
pairs[i] = {data[i], i};
|
|
|
|
|
2020-05-18 13:13:59 +00:00
|
|
|
RadixSort<RadixSortTraits<T>>::executeLSD(pairs.data(), s, reverse, res.data());
|
2019-04-25 01:16:26 +00:00
|
|
|
|
|
|
|
/// Radix sort treats all NaNs to be greater than all numbers.
|
|
|
|
/// If the user needs the opposite, we must move them accordingly.
|
|
|
|
if (std::is_floating_point_v<T> && nan_direction_hint < 0)
|
|
|
|
{
|
2020-05-18 13:13:59 +00:00
|
|
|
size_t nans_to_move = 0;
|
|
|
|
|
|
|
|
for (size_t i = 0; i < s; ++i)
|
2019-04-25 01:16:26 +00:00
|
|
|
{
|
2020-05-18 13:13:59 +00:00
|
|
|
if (isNaN(data[res[reverse ? i : s - 1 - i]]))
|
2019-04-25 01:16:26 +00:00
|
|
|
++nans_to_move;
|
|
|
|
else
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (nans_to_move)
|
|
|
|
{
|
2020-05-21 14:10:47 +00:00
|
|
|
std::rotate(std::begin(res), std::begin(res) + (reverse ? nans_to_move : s - nans_to_move), std::end(res));
|
2019-04-25 01:16:26 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
2019-02-18 16:29:37 +00:00
|
|
|
}
|
|
|
|
|
2019-04-25 01:16:26 +00:00
|
|
|
/// Default sorting algorithm.
|
|
|
|
for (size_t i = 0; i < s; ++i)
|
|
|
|
res[i] = i;
|
|
|
|
|
2022-02-23 17:34:19 +00:00
|
|
|
if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable)
|
2022-01-30 19:49:48 +00:00
|
|
|
::sort(res.begin(), res.end(), less(*this, nan_direction_hint));
|
2022-02-23 17:34:19 +00:00
|
|
|
else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable)
|
|
|
|
::sort(res.begin(), res.end(), less_stable(*this, nan_direction_hint));
|
|
|
|
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Unstable)
|
|
|
|
::sort(res.begin(), res.end(), greater(*this, nan_direction_hint));
|
|
|
|
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Stable)
|
|
|
|
::sort(res.begin(), res.end(), greater_stable(*this, nan_direction_hint));
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2017-03-11 01:12:51 +00:00
|
|
|
}
|
|
|
|
|
2020-05-12 00:58:58 +00:00
|
|
|
template <typename T>
|
2022-02-23 17:34:19 +00:00
|
|
|
void ColumnVector<T>::updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
|
|
|
|
size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
|
2020-05-12 00:58:58 +00:00
|
|
|
{
|
2022-01-30 19:49:48 +00:00
|
|
|
auto sort = [](auto begin, auto end, auto pred) { ::sort(begin, end, pred); };
|
2021-10-01 16:43:58 +00:00
|
|
|
auto partial_sort = [](auto begin, auto mid, auto end, auto pred) { ::partial_sort(begin, mid, end, pred); };
|
|
|
|
|
2022-02-23 17:34:19 +00:00
|
|
|
if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable)
|
|
|
|
{
|
2021-10-01 16:43:58 +00:00
|
|
|
this->updatePermutationImpl(
|
2022-02-23 17:34:19 +00:00
|
|
|
limit, res, equal_ranges,
|
|
|
|
less(*this, nan_direction_hint),
|
|
|
|
equals(*this, nan_direction_hint),
|
|
|
|
sort, partial_sort);
|
|
|
|
}
|
|
|
|
else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable)
|
|
|
|
{
|
2021-10-01 16:43:58 +00:00
|
|
|
this->updatePermutationImpl(
|
2022-02-23 17:34:19 +00:00
|
|
|
limit, res, equal_ranges,
|
|
|
|
less_stable(*this, nan_direction_hint),
|
|
|
|
equals(*this, nan_direction_hint),
|
|
|
|
sort, partial_sort);
|
|
|
|
}
|
|
|
|
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Unstable)
|
|
|
|
{
|
|
|
|
this->updatePermutationImpl(
|
|
|
|
limit, res, equal_ranges,
|
2021-10-01 16:43:58 +00:00
|
|
|
greater(*this, nan_direction_hint),
|
|
|
|
equals(*this, nan_direction_hint),
|
|
|
|
sort, partial_sort);
|
2022-02-23 17:34:19 +00:00
|
|
|
}
|
|
|
|
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Stable)
|
|
|
|
{
|
2021-10-01 16:43:58 +00:00
|
|
|
this->updatePermutationImpl(
|
2022-02-23 17:34:19 +00:00
|
|
|
limit, res, equal_ranges,
|
|
|
|
greater_stable(*this, nan_direction_hint),
|
2021-10-01 16:43:58 +00:00
|
|
|
equals(*this, nan_direction_hint),
|
|
|
|
sort, partial_sort);
|
2022-02-23 17:34:19 +00:00
|
|
|
}
|
2020-05-12 00:58:58 +00:00
|
|
|
}
|
|
|
|
|
2018-08-07 13:57:28 +00:00
|
|
|
template <typename T>
|
|
|
|
MutableColumnPtr ColumnVector<T>::cloneResized(size_t size) const
|
2017-03-11 01:12:51 +00:00
|
|
|
{
|
2017-12-15 19:46:24 +00:00
|
|
|
auto res = this->create();
|
2017-03-11 01:12:51 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (size > 0)
|
|
|
|
{
|
2017-12-14 03:56:56 +00:00
|
|
|
auto & new_col = static_cast<Self &>(*res);
|
2017-04-01 07:20:54 +00:00
|
|
|
new_col.data.resize(size);
|
2017-03-11 01:12:51 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t count = std::min(this->size(), size);
|
2021-01-26 18:22:40 +00:00
|
|
|
memcpy(new_col.data.data(), data.data(), count * sizeof(data[0]));
|
2017-03-11 01:12:51 +00:00
|
|
|
|
2021-01-26 18:22:40 +00:00
|
|
|
if (size > count)
|
2021-05-03 22:46:51 +00:00
|
|
|
memset(static_cast<void *>(&new_col.data[count]), 0, (size - count) * sizeof(ValueType));
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2017-03-11 01:12:51 +00:00
|
|
|
|
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
|
|
|
return res;
|
2017-03-11 01:12:51 +00:00
|
|
|
}
|
|
|
|
|
2018-08-07 13:57:28 +00:00
|
|
|
template <typename T>
|
2021-05-06 22:57:41 +00:00
|
|
|
UInt64 ColumnVector<T>::get64(size_t n [[maybe_unused]]) const
|
2017-03-11 01:12:51 +00:00
|
|
|
{
|
2021-05-09 21:26:34 +00:00
|
|
|
if constexpr (is_arithmetic_v<T>)
|
2021-06-15 19:55:21 +00:00
|
|
|
return bit_cast<UInt64>(data[n]);
|
2021-05-03 22:46:51 +00:00
|
|
|
else
|
2021-05-04 17:26:09 +00:00
|
|
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get the value of {} as UInt64", TypeName<T>);
|
2017-03-11 01:12:51 +00:00
|
|
|
}
|
|
|
|
|
2019-05-30 21:59:40 +00:00
|
|
|
template <typename T>
|
2021-05-06 22:57:41 +00:00
|
|
|
inline Float64 ColumnVector<T>::getFloat64(size_t n [[maybe_unused]]) const
|
2019-05-30 21:59:40 +00:00
|
|
|
{
|
2021-05-09 21:26:34 +00:00
|
|
|
if constexpr (is_arithmetic_v<T>)
|
2021-05-03 22:46:51 +00:00
|
|
|
return static_cast<Float64>(data[n]);
|
|
|
|
else
|
2021-05-04 17:26:09 +00:00
|
|
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get the value of {} as Float64", TypeName<T>);
|
2019-05-30 21:59:40 +00:00
|
|
|
}
|
|
|
|
|
2019-12-03 01:55:46 +00:00
|
|
|
template <typename T>
|
2021-05-06 22:57:41 +00:00
|
|
|
Float32 ColumnVector<T>::getFloat32(size_t n [[maybe_unused]]) const
|
2019-12-03 01:55:46 +00:00
|
|
|
{
|
2021-05-09 21:26:34 +00:00
|
|
|
if constexpr (is_arithmetic_v<T>)
|
2021-05-03 22:46:51 +00:00
|
|
|
return static_cast<Float32>(data[n]);
|
|
|
|
else
|
2021-05-04 17:26:09 +00:00
|
|
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get the value of {} as Float32", TypeName<T>);
|
2019-12-03 01:55:46 +00:00
|
|
|
}
|
|
|
|
|
2018-08-07 13:57:28 +00:00
|
|
|
template <typename T>
|
|
|
|
void ColumnVector<T>::insertRangeFrom(const IColumn & src, size_t start, size_t length)
|
2017-03-11 01:12:51 +00:00
|
|
|
{
|
2019-08-21 02:28:04 +00:00
|
|
|
const ColumnVector & src_vec = assert_cast<const ColumnVector &>(src);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
if (start + length > src_vec.data.size())
|
|
|
|
throw Exception("Parameters start = "
|
|
|
|
+ toString(start) + ", length = "
|
|
|
|
+ toString(length) + " are out of bound in ColumnVector<T>::insertRangeFrom method"
|
|
|
|
" (data.size() = " + toString(src_vec.data.size()) + ").",
|
|
|
|
ErrorCodes::PARAMETER_OUT_OF_BOUND);
|
|
|
|
|
|
|
|
size_t old_size = data.size();
|
|
|
|
data.resize(old_size + length);
|
2021-01-26 18:22:40 +00:00
|
|
|
memcpy(data.data() + old_size, &src_vec.data[start], length * sizeof(data[0]));
|
2017-03-11 01:12:51 +00:00
|
|
|
}
|
|
|
|
|
2018-08-07 13:57:28 +00:00
|
|
|
template <typename T>
|
2021-08-10 11:31:15 +00:00
|
|
|
ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_size_hint) const
|
2017-03-11 01:12:51 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t size = data.size();
|
|
|
|
if (size != filt.size())
|
2022-03-01 17:20:53 +00:00
|
|
|
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of filter ({}) doesn't match size of column ({})", filt.size(), size);
|
2017-03-11 01:12:51 +00:00
|
|
|
|
2017-12-15 19:46:24 +00:00
|
|
|
auto res = this->create();
|
2017-12-15 21:32:25 +00:00
|
|
|
Container & res_data = res->getData();
|
2017-03-11 01:12:51 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (result_size_hint)
|
|
|
|
res_data.reserve(result_size_hint > 0 ? result_size_hint : size);
|
2017-03-11 01:12:51 +00:00
|
|
|
|
2021-01-26 18:22:40 +00:00
|
|
|
const UInt8 * filt_pos = filt.data();
|
|
|
|
const UInt8 * filt_end = filt_pos + size;
|
|
|
|
const T * data_pos = data.data();
|
2021-10-25 20:09:09 +00:00
|
|
|
|
2021-10-11 22:34:27 +00:00
|
|
|
/** A slightly more optimized version.
|
|
|
|
* Based on the assumption that often pieces of consecutive values
|
|
|
|
* completely pass or do not pass the filter.
|
|
|
|
* Therefore, we will optimistically check the parts of `SIMD_BYTES` values.
|
|
|
|
*/
|
2021-10-11 17:21:13 +00:00
|
|
|
static constexpr size_t SIMD_BYTES = 64;
|
2021-10-25 20:09:09 +00:00
|
|
|
const UInt8 * filt_end_aligned = filt_pos + size / SIMD_BYTES * SIMD_BYTES;
|
2021-10-11 17:21:13 +00:00
|
|
|
|
2021-10-25 20:09:09 +00:00
|
|
|
while (filt_pos < filt_end_aligned)
|
2021-10-12 02:54:47 +00:00
|
|
|
{
|
2021-11-20 12:58:44 +00:00
|
|
|
UInt64 mask = bytes64MaskToBits64Mask(filt_pos);
|
2021-10-11 17:21:13 +00:00
|
|
|
|
2021-10-25 20:09:09 +00:00
|
|
|
if (0xffffffffffffffff == mask)
|
2021-10-11 17:21:13 +00:00
|
|
|
{
|
|
|
|
res_data.insert(data_pos, data_pos + SIMD_BYTES);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
while (mask)
|
|
|
|
{
|
|
|
|
size_t index = __builtin_ctzll(mask);
|
|
|
|
res_data.push_back(data_pos[index]);
|
|
|
|
#ifdef __BMI__
|
|
|
|
mask = _blsr_u64(mask);
|
|
|
|
#else
|
|
|
|
mask = mask & (mask-1);
|
2021-10-12 02:54:47 +00:00
|
|
|
#endif
|
2021-10-11 17:21:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
filt_pos += SIMD_BYTES;
|
|
|
|
data_pos += SIMD_BYTES;
|
|
|
|
}
|
2021-10-11 22:34:27 +00:00
|
|
|
|
2021-01-26 18:22:40 +00:00
|
|
|
while (filt_pos < filt_end)
|
|
|
|
{
|
2021-08-10 11:31:15 +00:00
|
|
|
if (*filt_pos)
|
2021-01-26 18:22:40 +00:00
|
|
|
res_data.push_back(*data_pos);
|
2017-03-11 01:12:51 +00:00
|
|
|
|
2021-01-26 18:22:40 +00:00
|
|
|
++filt_pos;
|
|
|
|
++data_pos;
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2017-03-11 01:12:51 +00:00
|
|
|
|
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
|
|
|
return res;
|
2017-03-11 01:12:51 +00:00
|
|
|
}
|
|
|
|
|
2021-04-27 12:49:58 +00:00
|
|
|
template <typename T>
|
2021-06-07 10:55:55 +00:00
|
|
|
void ColumnVector<T>::expand(const IColumn::Filter & mask, bool inverted)
|
2021-04-27 12:49:58 +00:00
|
|
|
{
|
2021-06-07 10:55:55 +00:00
|
|
|
expandDataByMask<T>(data, mask, inverted);
|
2021-04-27 12:49:58 +00:00
|
|
|
}
|
|
|
|
|
2020-06-23 21:06:32 +00:00
|
|
|
template <typename T>
|
|
|
|
void ColumnVector<T>::applyZeroMap(const IColumn::Filter & filt, bool inverted)
|
|
|
|
{
|
|
|
|
size_t size = data.size();
|
|
|
|
if (size != filt.size())
|
2022-03-01 17:20:53 +00:00
|
|
|
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of filter ({}) doesn't match size of column ({})", filt.size(), size);
|
2020-06-23 21:06:32 +00:00
|
|
|
|
|
|
|
const UInt8 * filt_pos = filt.data();
|
|
|
|
const UInt8 * filt_end = filt_pos + size;
|
|
|
|
T * data_pos = data.data();
|
|
|
|
|
|
|
|
if (inverted)
|
|
|
|
{
|
|
|
|
for (; filt_pos < filt_end; ++filt_pos, ++data_pos)
|
|
|
|
if (!*filt_pos)
|
|
|
|
*data_pos = 0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
for (; filt_pos < filt_end; ++filt_pos, ++data_pos)
|
|
|
|
if (*filt_pos)
|
|
|
|
*data_pos = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-08-07 13:57:28 +00:00
|
|
|
template <typename T>
|
2019-02-18 19:44:26 +00:00
|
|
|
ColumnPtr ColumnVector<T>::permute(const IColumn::Permutation & perm, size_t limit) const
|
2017-03-11 01:12:51 +00:00
|
|
|
{
|
2021-09-29 17:51:58 +00:00
|
|
|
return permuteImpl(*this, perm, limit);
|
2017-03-11 01:12:51 +00:00
|
|
|
}
|
|
|
|
|
2018-04-18 21:00:47 +00:00
|
|
|
template <typename T>
|
2019-02-18 17:28:53 +00:00
|
|
|
ColumnPtr ColumnVector<T>::index(const IColumn & indexes, size_t limit) const
|
2018-04-18 21:00:47 +00:00
|
|
|
{
|
|
|
|
return selectIndexImpl(*this, indexes, limit);
|
|
|
|
}
|
|
|
|
|
2018-08-07 13:57:28 +00:00
|
|
|
template <typename T>
|
|
|
|
ColumnPtr ColumnVector<T>::replicate(const IColumn::Offsets & offsets) const
|
2017-03-11 01:12:51 +00:00
|
|
|
{
|
2022-05-18 13:10:01 +00:00
|
|
|
#ifdef __SSE2__
|
2022-05-16 14:48:37 +00:00
|
|
|
if constexpr (std::is_same_v<T, UInt32>)
|
|
|
|
{
|
|
|
|
return replicateSSE2(offsets);
|
|
|
|
}
|
|
|
|
#endif
|
2020-02-28 13:27:01 +00:00
|
|
|
const size_t size = data.size();
|
2017-04-01 07:20:54 +00:00
|
|
|
if (size != offsets.size())
|
|
|
|
throw Exception("Size of offsets doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
|
2017-03-11 01:12:51 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (0 == size)
|
2017-12-15 19:46:24 +00:00
|
|
|
return this->create();
|
2017-03-11 01:12:51 +00:00
|
|
|
|
2020-02-21 19:47:20 +00:00
|
|
|
auto res = this->create(offsets.back());
|
2017-03-11 01:12:51 +00:00
|
|
|
|
2020-08-19 11:52:17 +00:00
|
|
|
auto it = res->getData().begin(); // NOLINT
|
2017-04-01 07:20:54 +00:00
|
|
|
for (size_t i = 0; i < size; ++i)
|
|
|
|
{
|
2020-08-19 11:52:17 +00:00
|
|
|
const auto span_end = res->getData().begin() + offsets[i]; // NOLINT
|
2020-03-02 16:51:03 +00:00
|
|
|
for (; it != span_end; ++it)
|
|
|
|
*it = data[i];
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2017-03-11 01:12:51 +00:00
|
|
|
|
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
|
|
|
return res;
|
2017-03-11 01:12:51 +00:00
|
|
|
}
|
|
|
|
|
2022-05-18 13:10:01 +00:00
|
|
|
#ifdef __SSE2__
|
2022-05-16 14:48:37 +00:00
|
|
|
template <typename T>
|
|
|
|
ColumnPtr ColumnVector<T>::replicateSSE2(const IColumn::Offsets & offsets) const
|
|
|
|
{
|
|
|
|
const size_t size = data.size();
|
|
|
|
if (size != offsets.size())
|
|
|
|
throw Exception("Size of offsets doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
|
|
|
|
|
|
|
|
if (0 == size)
|
|
|
|
return this->create();
|
|
|
|
|
|
|
|
auto res = this->create(offsets.back());
|
|
|
|
|
|
|
|
auto it = res->getData().begin(); // NOLINT
|
2022-05-21 12:12:43 +00:00
|
|
|
///Column used padded pod array. Don't worry about the 4 conitnue ops will out of range
|
2022-05-16 14:48:37 +00:00
|
|
|
if constexpr (std::is_same_v<T, UInt32>)
|
|
|
|
{
|
|
|
|
size_t prev_offset = 0;
|
|
|
|
int cp_begin = -1;
|
|
|
|
for (size_t i = 0; i < size; ++i)
|
|
|
|
{
|
|
|
|
size_t span = offsets[i] - prev_offset;
|
|
|
|
prev_offset = offsets[i];
|
|
|
|
if (span == 1)
|
|
|
|
{
|
|
|
|
if (cp_begin == -1)
|
|
|
|
cp_begin = i;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
///data : 11 22 33 44 55
|
|
|
|
///offsets: 0 1 2 3 3
|
|
|
|
///res: 22 33 44
|
|
|
|
size_t cpsz = (!(cp_begin == -1)) * (i - cp_begin);
|
|
|
|
bool remain = (cpsz & 3);
|
|
|
|
size_t sse_cp_counter = (cpsz >> 2);
|
|
|
|
sse_cp_counter = remain * (sse_cp_counter + 1) + (!remain) * (sse_cp_counter);
|
2022-05-19 11:13:12 +00:00
|
|
|
auto it_tmp = it; // NOLINT
|
2022-05-16 14:48:37 +00:00
|
|
|
size_t data_start = cp_begin;
|
|
|
|
cp_begin = -1;
|
|
|
|
constexpr const int msk_cp = (_MM_SHUFFLE(3, 2, 1, 0));
|
|
|
|
while (sse_cp_counter--)
|
|
|
|
{
|
|
|
|
__m128i cdata = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&data[data_start]));
|
|
|
|
auto cres = _mm_shuffle_epi32(cdata, msk_cp);
|
|
|
|
_mm_storeu_si128(reinterpret_cast<__m128i *>(it_tmp), cres);
|
|
|
|
it_tmp += 4;
|
|
|
|
data_start += 4;
|
|
|
|
}
|
|
|
|
it += cpsz;
|
|
|
|
if (span == 0)
|
|
|
|
{
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
///data : 11 22 33
|
|
|
|
///offsets: 0 0 4
|
|
|
|
///res: 33 33 33 33
|
|
|
|
size_t shuffle_sz = span;
|
|
|
|
bool shuffle_remain = (shuffle_sz & 3);
|
|
|
|
size_t sse_shuffle_counter = (shuffle_sz >> 2);
|
|
|
|
sse_shuffle_counter = shuffle_remain * (sse_shuffle_counter + 1) + (!shuffle_remain) * (sse_shuffle_counter);
|
|
|
|
it_tmp = it;
|
|
|
|
constexpr const int msk_shuffle = (_MM_SHUFFLE(0, 0, 0, 0));
|
|
|
|
__m128i cdata = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&data[i]));
|
|
|
|
while (sse_shuffle_counter--)
|
|
|
|
{
|
|
|
|
auto cres = _mm_shuffle_epi32(cdata, msk_shuffle);
|
|
|
|
_mm_storeu_si128(reinterpret_cast<__m128i *>(it_tmp), cres);
|
|
|
|
it_tmp += 4;
|
|
|
|
}
|
|
|
|
it += shuffle_sz;
|
|
|
|
}
|
|
|
|
///data : 11 22 33 44 55
|
|
|
|
///offsets: 1 2 3 4 5
|
|
|
|
///res: 11 22 33 44 55
|
|
|
|
if (cp_begin != -1)
|
|
|
|
{
|
|
|
|
size_t cpsz = (size - cp_begin);
|
|
|
|
bool remain = (cpsz & 3);
|
|
|
|
size_t sse_cp_counter = (cpsz >> 2);
|
|
|
|
sse_cp_counter = remain * (sse_cp_counter + 1) + (!remain) * (sse_cp_counter);
|
2022-05-19 11:13:12 +00:00
|
|
|
auto it_tmp = it; // NOLINT
|
2022-05-16 14:48:37 +00:00
|
|
|
size_t data_start = cp_begin;
|
|
|
|
constexpr const int msk_cp = (_MM_SHUFFLE(3, 2, 1, 0));
|
|
|
|
while (sse_cp_counter--)
|
|
|
|
{
|
|
|
|
__m128i cdata = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&data[data_start]));
|
|
|
|
auto cres = _mm_shuffle_epi32(cdata, msk_cp);
|
|
|
|
_mm_storeu_si128(reinterpret_cast<__m128i *>(it_tmp), cres);
|
|
|
|
it_tmp += 4;
|
|
|
|
data_start += 4;
|
|
|
|
}
|
|
|
|
it += cpsz;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2018-08-07 13:57:28 +00:00
|
|
|
template <typename T>
|
|
|
|
void ColumnVector<T>::gather(ColumnGathererStream & gatherer)
|
2017-07-06 13:54:55 +00:00
|
|
|
{
|
|
|
|
gatherer.gather(*this);
|
|
|
|
}
|
|
|
|
|
2018-08-07 13:57:28 +00:00
|
|
|
template <typename T>
|
|
|
|
void ColumnVector<T>::getExtremes(Field & min, Field & max) const
|
2017-03-11 01:12:51 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t size = data.size();
|
|
|
|
|
|
|
|
if (size == 0)
|
|
|
|
{
|
2018-10-22 08:54:54 +00:00
|
|
|
min = T(0);
|
|
|
|
max = T(0);
|
2017-04-01 07:20:54 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool has_value = false;
|
|
|
|
|
|
|
|
/** Skip all NaNs in extremes calculation.
|
|
|
|
* If all values are NaNs, then return NaN.
|
|
|
|
* NOTE: There exist many different NaNs.
|
|
|
|
* Different NaN could be returned: not bit-exact value as one of NaNs from column.
|
|
|
|
*/
|
|
|
|
|
|
|
|
T cur_min = NaNOrZero<T>();
|
|
|
|
T cur_max = NaNOrZero<T>();
|
|
|
|
|
2021-01-26 19:39:03 +00:00
|
|
|
for (const T & x : data)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
if (isNaN(x))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (!has_value)
|
|
|
|
{
|
|
|
|
cur_min = x;
|
|
|
|
cur_max = x;
|
|
|
|
has_value = true;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (x < cur_min)
|
|
|
|
cur_min = x;
|
2017-09-14 11:52:22 +00:00
|
|
|
else if (x > cur_max)
|
2017-04-01 07:20:54 +00:00
|
|
|
cur_max = x;
|
|
|
|
}
|
|
|
|
|
2018-11-20 20:09:20 +00:00
|
|
|
min = NearestFieldType<T>(cur_min);
|
|
|
|
max = NearestFieldType<T>(cur_max);
|
2017-03-11 01:12:51 +00:00
|
|
|
}
|
|
|
|
|
2021-02-07 01:41:31 +00:00
|
|
|
|
|
|
|
#pragma GCC diagnostic ignored "-Wold-style-cast"
|
|
|
|
|
|
|
|
template <typename T>
|
2021-02-12 00:25:00 +00:00
|
|
|
ColumnPtr ColumnVector<T>::compress() const
|
2021-02-07 01:41:31 +00:00
|
|
|
{
|
2021-12-18 20:13:42 +00:00
|
|
|
const size_t data_size = data.size();
|
|
|
|
const size_t source_size = data_size * sizeof(T);
|
2021-02-10 18:53:31 +00:00
|
|
|
|
|
|
|
/// Don't compress small blocks.
|
|
|
|
if (source_size < 4096) /// A wild guess.
|
2021-02-12 00:25:00 +00:00
|
|
|
return ColumnCompressed::wrap(this->getPtr());
|
2021-02-10 18:53:31 +00:00
|
|
|
|
2021-02-18 00:52:09 +00:00
|
|
|
auto compressed = ColumnCompressed::compressBuffer(data.data(), source_size, false);
|
2021-02-07 01:41:31 +00:00
|
|
|
|
2021-02-12 00:25:00 +00:00
|
|
|
if (!compressed)
|
|
|
|
return ColumnCompressed::wrap(this->getPtr());
|
2021-02-07 01:41:31 +00:00
|
|
|
|
2021-12-18 20:13:42 +00:00
|
|
|
const size_t compressed_size = compressed->size();
|
|
|
|
return ColumnCompressed::create(data_size, compressed_size,
|
|
|
|
[compressed = std::move(compressed), column_size = data_size]
|
2021-02-12 00:25:00 +00:00
|
|
|
{
|
|
|
|
auto res = ColumnVector<T>::create(column_size);
|
|
|
|
ColumnCompressed::decompressBuffer(
|
|
|
|
compressed->data(), res->getData().data(), compressed->size(), column_size * sizeof(T));
|
|
|
|
return res;
|
|
|
|
});
|
2021-02-07 01:41:31 +00:00
|
|
|
}
|
|
|
|
|
2021-04-01 18:18:28 +00:00
|
|
|
template <typename T>
|
2021-09-16 13:57:45 +00:00
|
|
|
ColumnPtr ColumnVector<T>::createWithOffsets(const IColumn::Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const
|
2021-04-01 18:18:28 +00:00
|
|
|
{
|
2021-05-21 00:57:11 +00:00
|
|
|
if (offsets.size() + shift != size())
|
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
|
|
|
"Incompatible sizes of offsets ({}), shift ({}) and size of column {}", offsets.size(), shift, size());
|
|
|
|
|
2021-04-01 18:18:28 +00:00
|
|
|
auto res = this->create();
|
|
|
|
auto & res_data = res->getData();
|
|
|
|
|
2021-09-16 13:57:45 +00:00
|
|
|
T default_value = safeGet<T>(default_field);
|
|
|
|
res_data.resize_fill(total_rows, default_value);
|
2021-04-01 18:18:28 +00:00
|
|
|
for (size_t i = 0; i < offsets.size(); ++i)
|
2021-05-21 00:57:11 +00:00
|
|
|
res_data[offsets[i]] = data[i + shift];
|
2021-04-01 18:18:28 +00:00
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2017-04-18 03:03:39 +00:00
|
|
|
/// Explicit template instantiations - to avoid code bloat in headers.
|
2018-08-07 13:57:28 +00:00
|
|
|
template class ColumnVector<UInt8>;
|
|
|
|
template class ColumnVector<UInt16>;
|
|
|
|
template class ColumnVector<UInt32>;
|
|
|
|
template class ColumnVector<UInt64>;
|
|
|
|
template class ColumnVector<UInt128>;
|
2020-09-01 09:54:50 +00:00
|
|
|
template class ColumnVector<UInt256>;
|
2018-08-07 13:57:28 +00:00
|
|
|
template class ColumnVector<Int8>;
|
|
|
|
template class ColumnVector<Int16>;
|
|
|
|
template class ColumnVector<Int32>;
|
|
|
|
template class ColumnVector<Int64>;
|
|
|
|
template class ColumnVector<Int128>;
|
2020-09-01 09:54:50 +00:00
|
|
|
template class ColumnVector<Int256>;
|
2018-08-07 13:57:28 +00:00
|
|
|
template class ColumnVector<Float32>;
|
|
|
|
template class ColumnVector<Float64>;
|
2021-05-03 22:46:51 +00:00
|
|
|
template class ColumnVector<UUID>;
|
2021-02-11 21:54:50 +00:00
|
|
|
|
2017-03-11 01:12:51 +00:00
|
|
|
}
|