mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-19 16:20:50 +00:00
Merge a446ff5524
into b4504f20bf
This commit is contained in:
commit
1a73bd79a4
@ -314,12 +314,12 @@ set (COMPILER_FLAGS "${COMPILER_FLAGS} -ffp-contract=off")
|
||||
set (DEBUG_INFO_FLAGS "-g")
|
||||
|
||||
# Disable omit frame pointer compiler optimization using -fno-omit-frame-pointer
|
||||
option(DISABLE_OMIT_FRAME_POINTER "Disable omit frame pointer compiler optimization" OFF)
|
||||
option(DISABLE_OMIT_FRAME_POINTER "Disable omit frame pointer compiler optimization" ON)
|
||||
|
||||
if (DISABLE_OMIT_FRAME_POINTER)
|
||||
set (CMAKE_CXX_FLAGS_ADD "${CMAKE_CXX_FLAGS_ADD} -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer")
|
||||
set (CMAKE_C_FLAGS_ADD "${CMAKE_C_FLAGS_ADD} -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer")
|
||||
set (CMAKE_ASM_FLAGS_ADD "${CMAKE_ASM_FLAGS_ADD} -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer")
|
||||
set (CMAKE_CXX_FLAGS_ADD "${CMAKE_CXX_FLAGS_ADD} -fno-omit-frame-pointer")
|
||||
set (CMAKE_C_FLAGS_ADD "${CMAKE_C_FLAGS_ADD} -fno-omit-frame-pointer")
|
||||
set (CMAKE_ASM_FLAGS_ADD "${CMAKE_ASM_FLAGS_ADD} -fno-omit-frame-pointer")
|
||||
endif()
|
||||
|
||||
# Before you start hating your debugger because it refuses to show variables ('<optimized out>'), try building with -DDEBUG_O_LEVEL="0"
|
||||
|
@ -739,8 +739,6 @@ struct ImplXXH3
|
||||
static constexpr bool use_int_hash_for_pods = false;
|
||||
};
|
||||
|
||||
DECLARE_MULTITARGET_CODE(
|
||||
|
||||
template <typename Impl, typename Name>
|
||||
class FunctionIntHash : public IFunction
|
||||
{
|
||||
@ -832,44 +830,11 @@ public:
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}",
|
||||
arguments[0].type->getName(), getName());
|
||||
}
|
||||
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionIntHash>(); }
|
||||
};
|
||||
|
||||
) // DECLARE_MULTITARGET_CODE
|
||||
|
||||
template <typename Impl, typename Name>
|
||||
class FunctionIntHash : public TargetSpecific::Default::FunctionIntHash<Impl, Name>
|
||||
{
|
||||
public:
|
||||
explicit FunctionIntHash(ContextPtr context) : selector(context)
|
||||
{
|
||||
selector.registerImplementation<TargetArch::Default,
|
||||
TargetSpecific::Default::FunctionIntHash<Impl, Name>>();
|
||||
|
||||
#if USE_MULTITARGET_CODE
|
||||
selector.registerImplementation<TargetArch::AVX2,
|
||||
TargetSpecific::AVX2::FunctionIntHash<Impl, Name>>();
|
||||
selector.registerImplementation<TargetArch::AVX512F,
|
||||
TargetSpecific::AVX512F::FunctionIntHash<Impl, Name>>();
|
||||
#endif
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
{
|
||||
return selector.selectAndExecute(arguments, result_type, input_rows_count);
|
||||
}
|
||||
|
||||
static FunctionPtr create(ContextPtr context)
|
||||
{
|
||||
return std::make_shared<FunctionIntHash>(context);
|
||||
}
|
||||
|
||||
private:
|
||||
ImplementationSelector<IFunction> selector;
|
||||
};
|
||||
|
||||
DECLARE_MULTITARGET_CODE(
|
||||
|
||||
template <typename Impl, bool Keyed, typename KeyType, typename KeyColumnsType>
|
||||
template <typename Impl, bool Keyed = false, typename KeyType = char, typename KeyColumnsType = char>
|
||||
class FunctionAnyHash : public IFunction
|
||||
{
|
||||
public:
|
||||
@ -1396,38 +1361,8 @@ public:
|
||||
else
|
||||
return Impl::combineHashes(h1, h2);
|
||||
}
|
||||
};
|
||||
|
||||
) // DECLARE_MULTITARGET_CODE
|
||||
|
||||
template <typename Impl, bool Keyed = false, typename KeyType = char, typename KeyColumnsType = char>
|
||||
class FunctionAnyHash : public TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>
|
||||
{
|
||||
public:
|
||||
explicit FunctionAnyHash(ContextPtr context) : selector(context)
|
||||
{
|
||||
selector
|
||||
.registerImplementation<TargetArch::Default, TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
|
||||
|
||||
#if USE_MULTITARGET_CODE
|
||||
selector.registerImplementation<TargetArch::AVX2, TargetSpecific::AVX2::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
|
||||
selector
|
||||
.registerImplementation<TargetArch::AVX512F, TargetSpecific::AVX512F::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
|
||||
#endif
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
{
|
||||
return selector.selectAndExecute(arguments, result_type, input_rows_count);
|
||||
}
|
||||
|
||||
static FunctionPtr create(ContextPtr context)
|
||||
{
|
||||
return std::make_shared<FunctionAnyHash>(context);
|
||||
}
|
||||
|
||||
private:
|
||||
ImplementationSelector<IFunction> selector;
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionAnyHash>(); }
|
||||
};
|
||||
|
||||
|
||||
|
@ -4,8 +4,11 @@
|
||||
#include <Common/HashTable/Hash.h>
|
||||
#include <Common/randomSeed.h>
|
||||
#include <base/unaligned.h>
|
||||
|
||||
#if USE_MULTITARGET_CODE
|
||||
# include <x86intrin.h>
|
||||
# include <Common/TargetSpecific.h>
|
||||
|
||||
# include <immintrin.h>
|
||||
#endif
|
||||
|
||||
|
||||
@ -64,59 +67,62 @@ namespace
|
||||
0x15762761bb55b9acULL, 0x3e448fc94fdd28e7ULL, 0xa5121232adfbe70aULL, 0xb1e0f6d286112804ULL,
|
||||
0x6062e96de9554806ULL, 0xcc679b329c28882aULL, 0x5c6d29f45cbc060eULL, 0x1af1325a86ffb162ULL,
|
||||
};
|
||||
}
|
||||
|
||||
DECLARE_DEFAULT_CODE(
|
||||
|
||||
void RandImpl::execute(char * output, size_t size)
|
||||
{
|
||||
LinearCongruentialGenerator generator0;
|
||||
LinearCongruentialGenerator generator1;
|
||||
LinearCongruentialGenerator generator2;
|
||||
LinearCongruentialGenerator generator3;
|
||||
|
||||
UInt64 rand_seed = randomSeed();
|
||||
|
||||
seed(generator0, rand_seed, random_numbers[0] + reinterpret_cast<intptr_t>(output));
|
||||
seed(generator1, rand_seed, random_numbers[1] + reinterpret_cast<intptr_t>(output));
|
||||
seed(generator2, rand_seed, random_numbers[2] + reinterpret_cast<intptr_t>(output));
|
||||
seed(generator3, rand_seed, random_numbers[3] + reinterpret_cast<intptr_t>(output));
|
||||
|
||||
for (const char * end = output + size; output < end; output += 16)
|
||||
void randImpl(char * output, size_t size)
|
||||
{
|
||||
unalignedStore<UInt32>(output, generator0.next());
|
||||
unalignedStore<UInt32>(output + 4, generator1.next());
|
||||
unalignedStore<UInt32>(output + 8, generator2.next());
|
||||
unalignedStore<UInt32>(output + 12, generator3.next());
|
||||
LinearCongruentialGenerator generator0;
|
||||
LinearCongruentialGenerator generator1;
|
||||
LinearCongruentialGenerator generator2;
|
||||
LinearCongruentialGenerator generator3;
|
||||
|
||||
UInt64 rand_seed = randomSeed();
|
||||
|
||||
seed(generator0, rand_seed, random_numbers[0] + reinterpret_cast<intptr_t>(output));
|
||||
seed(generator1, rand_seed, random_numbers[1] + reinterpret_cast<intptr_t>(output));
|
||||
seed(generator2, rand_seed, random_numbers[2] + reinterpret_cast<intptr_t>(output));
|
||||
seed(generator3, rand_seed, random_numbers[3] + reinterpret_cast<intptr_t>(output));
|
||||
|
||||
for (const char * end = output + size; output < end; output += 16)
|
||||
{
|
||||
unalignedStore<UInt32>(output, generator0.next());
|
||||
unalignedStore<UInt32>(output + 4, generator1.next());
|
||||
unalignedStore<UInt32>(output + 8, generator2.next());
|
||||
unalignedStore<UInt32>(output + 12, generator3.next());
|
||||
}
|
||||
/// It is guaranteed (by PaddedPODArray) that we can overwrite up to 15 bytes after end.
|
||||
}
|
||||
/// It is guaranteed (by PaddedPODArray) that we can overwrite up to 15 bytes after end.
|
||||
}
|
||||
|
||||
) // DECLARE_DEFAULT_CODE
|
||||
|
||||
DECLARE_AVX2_SPECIFIC_CODE(
|
||||
#if USE_MULTITARGET_CODE
|
||||
|
||||
using namespace VectorExtension;
|
||||
|
||||
/* Takes 2 vectors with LinearCongruentialGenerator states and combines them into vector with random values.
|
||||
* From every rand-state we use only bits 15...47 to generate random vector.
|
||||
*/
|
||||
inline UInt64x4 combineValues(UInt64x4 a, UInt64x4 b)
|
||||
AVX2_FUNCTION_SPECIFIC_ATTRIBUTE ALWAYS_INLINE inline UInt64x4 combineValuesAVX2(UInt64x4 & a, UInt64x4 & b)
|
||||
{
|
||||
auto xa = reinterpret_cast<__m256i>(a);
|
||||
auto xb = reinterpret_cast<__m256i>(b);
|
||||
/// Every state is 8-byte value and we need to use only 4 from the middle.
|
||||
/// Swap the low half and the high half of every state to move these bytes from the middle to sides.
|
||||
/// xa = xa[1, 0, 3, 2, 5, 4, 7, 6]
|
||||
|
||||
/// 2 128-bit lanes
|
||||
/// Each lane consist of 4 32-bit words
|
||||
/// We only want to keep the 4 words of the middle so we move them to the sides
|
||||
/// Mask: 0xb1 => 0b10110001 => Order: 2, 3, 0, 1
|
||||
/// xa = a[2, 3, 0, 1, 6, 7, 4, 5]
|
||||
xa = _mm256_shuffle_epi32(xa, 0xb1);
|
||||
/// Now every 8-byte value in xa is xx....xx and every value in xb is ..xxxx.. where x is random byte we want to use.
|
||||
|
||||
/// Now every 128-bit lane in xa is xx....xx and every value in xb is ..xxxx.. where x is random byte we want to use.
|
||||
/// Now each lane consists of 8 16-bit words
|
||||
/// Just blend them to get the result vector.
|
||||
/// result = xa[0],xb[1,2],xa[3,4],xb[5,6],xa[7,8],xb[9,10],xa[11,12],xb[13,14],xa[15]
|
||||
/// Mask (least significant 8 bits): 0x66 => 0b01100110 => a_b_b_a_a_b_b_a (x2)
|
||||
/// result = xa[0],xb[1,2],xa[3,4],xb[5,6],xa[7] - xa[8],xb[9,10],xa[11,12],xb[13,14],xa[15]
|
||||
/// Final: a[2], b[1], b[2], a[1], a[6], b[5], b[6], a[5] - a[10], b[9], b[10], a[9], a[14], b[13], b[14], a[13]
|
||||
__m256i result = _mm256_blend_epi16(xa, xb, 0x66);
|
||||
return reinterpret_cast<UInt64x4>(result);
|
||||
}
|
||||
|
||||
void RandImpl::execute(char * output, size_t size)
|
||||
AVX2_FUNCTION_SPECIFIC_ATTRIBUTE void NO_INLINE RandImpl::executeAVX2(char * output, size_t size)
|
||||
{
|
||||
if (size == 0)
|
||||
return;
|
||||
@ -130,13 +136,6 @@ void RandImpl::execute(char * output, size_t size)
|
||||
UInt64 rand_seed = randomSeed();
|
||||
|
||||
UInt64 a = LinearCongruentialGenerator::a;
|
||||
// TODO(dakovalkov): try to remove this.
|
||||
/// Note: GCC likes to expand multiplication by a constant into shifts + additions.
|
||||
/// In this case a few multiplications become tens of shifts and additions. That leads to a huge slow down.
|
||||
/// To avoid it we pretend that 'a' is not a constant. Actually we hope that rand_seed is never 0.
|
||||
if (rand_seed == 0)
|
||||
a = LinearCongruentialGenerator::a + 2;
|
||||
|
||||
constexpr UInt64 c = LinearCongruentialGenerator::c;
|
||||
|
||||
UInt64x4 gens1{};
|
||||
@ -156,16 +155,16 @@ void RandImpl::execute(char * output, size_t size)
|
||||
{
|
||||
gens1 = gens1 * a + c;
|
||||
gens2 = gens2 * a + c;
|
||||
unalignedStore<UInt64x4>(output, combineValues(gens1, gens2));
|
||||
unalignedStore<UInt64x4>(output, combineValuesAVX2(gens1, gens2));
|
||||
gens3 = gens3 * a + c;
|
||||
gens4 = gens4 * a + c;
|
||||
unalignedStore<UInt64x4>(output + sizeof(UInt64x4), combineValues(gens3, gens4));
|
||||
unalignedStore<UInt64x4>(output + sizeof(UInt64x4), combineValuesAVX2(gens3, gens4));
|
||||
gens1 = gens1 * a + c;
|
||||
gens2 = gens2 * a + c;
|
||||
unalignedStore<UInt64x4>(output + 2 * sizeof(UInt64x4), combineValues(gens1, gens2));
|
||||
unalignedStore<UInt64x4>(output + 2 * sizeof(UInt64x4), combineValuesAVX2(gens1, gens2));
|
||||
gens3 = gens3 * a + c;
|
||||
gens4 = gens4 * a + c;
|
||||
unalignedStore<UInt64x4>(output + 3 * sizeof(UInt64x4), combineValues(gens3, gens4));
|
||||
unalignedStore<UInt64x4>(output + 3 * sizeof(UInt64x4), combineValuesAVX2(gens3, gens4));
|
||||
output += bytes_per_write;
|
||||
}
|
||||
|
||||
@ -174,7 +173,7 @@ void RandImpl::execute(char * output, size_t size)
|
||||
{
|
||||
gens1 = gens1 * a + c;
|
||||
gens2 = gens2 * a + c;
|
||||
UInt64x4 values = combineValues(gens1, gens2);
|
||||
UInt64x4 values = combineValuesAVX2(gens1, gens2);
|
||||
for (int i = 0; i < vec_size && (end - output) > 0; ++i)
|
||||
{
|
||||
unalignedStore<UInt64>(output, values[i]);
|
||||
@ -183,6 +182,108 @@ void RandImpl::execute(char * output, size_t size)
|
||||
}
|
||||
}
|
||||
|
||||
) // DECLARE_AVX2_SPECIFIC_CODE
|
||||
|
||||
/* Takes 2 vectors with LinearCongruentialGenerator states and combines them into vector with random values.
|
||||
* From every rand-state we use only bits 15...47 to generate random vector.
|
||||
*/
|
||||
AVX512BW_FUNCTION_SPECIFIC_ATTRIBUTE ALWAYS_INLINE inline UInt64x8 combineValuesAVX512BW(UInt64x8 & a, UInt64x8 & b)
|
||||
{
|
||||
auto xa = reinterpret_cast<__m512i>(a);
|
||||
auto xb = reinterpret_cast<__m512i>(b);
|
||||
|
||||
/// 4 128-bit lanes
|
||||
/// Each lane consist of 4 32-bit words
|
||||
/// We only want to keep the 4 words of the middle so we move them to the sides
|
||||
/// Mask: 0xb1 => 0b10110001 => Order: 2, 3, 0, 1
|
||||
/// xa = a[2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13]
|
||||
xa = _mm512_shuffle_epi32(xa, 0xb1); // 0b10110001 => 2_3_0_1 (128 bits x 4 times)
|
||||
|
||||
/// Now every 128-bit lane in xa is xx....xx and every value in xb is ..xxxx.. where x is random byte we want to use.
|
||||
/// Now each lane consists of 32 16-bit words
|
||||
/// Just blend them to get the result vector.
|
||||
/// Mask (all 32 bits are used): 0x66666666 => 0b01100110011001100110011001100110
|
||||
__m512i result = _mm512_mask_blend_epi16(0x66666666, xa, xb);
|
||||
return reinterpret_cast<UInt64x8>(result);
|
||||
}
|
||||
|
||||
AVX512BW_FUNCTION_SPECIFIC_ATTRIBUTE void NO_INLINE RandImpl::executeAVX512BW(char * output, size_t size)
|
||||
{
|
||||
if (size == 0)
|
||||
return;
|
||||
|
||||
char * end = output + size;
|
||||
|
||||
constexpr int vec_size = 8;
|
||||
constexpr int safe_overwrite = PADDING_FOR_SIMD - 1;
|
||||
constexpr int bytes_per_write = 4 * sizeof(UInt64x8);
|
||||
|
||||
UInt64 rand_seed = randomSeed();
|
||||
|
||||
UInt64 a = LinearCongruentialGenerator::a;
|
||||
constexpr UInt64 c = LinearCongruentialGenerator::c;
|
||||
|
||||
UInt64x8 gens1{};
|
||||
UInt64x8 gens2{};
|
||||
UInt64x8 gens3{};
|
||||
UInt64x8 gens4{};
|
||||
|
||||
for (int i = 0; i < vec_size; ++i)
|
||||
{
|
||||
gens1[i] = calcSeed(rand_seed, random_numbers[i] + reinterpret_cast<intptr_t>(output));
|
||||
gens2[i] = calcSeed(rand_seed, random_numbers[i + vec_size] + reinterpret_cast<intptr_t>(output));
|
||||
gens3[i] = calcSeed(rand_seed, random_numbers[i + 2 * vec_size] + reinterpret_cast<intptr_t>(output));
|
||||
gens4[i] = calcSeed(rand_seed, random_numbers[i + 3 * vec_size] + reinterpret_cast<intptr_t>(output));
|
||||
}
|
||||
|
||||
while ((end - output) + safe_overwrite >= bytes_per_write)
|
||||
{
|
||||
gens1 = gens1 * a + c;
|
||||
gens2 = gens2 * a + c;
|
||||
unalignedStore<UInt64x8>(output, combineValuesAVX512BW(gens1, gens2));
|
||||
gens3 = gens3 * a + c;
|
||||
gens4 = gens4 * a + c;
|
||||
unalignedStore<UInt64x8>(output + sizeof(UInt64x8), combineValuesAVX512BW(gens3, gens4));
|
||||
gens1 = gens1 * a + c;
|
||||
gens2 = gens2 * a + c;
|
||||
unalignedStore<UInt64x8>(output + 2 * sizeof(UInt64x8), combineValuesAVX512BW(gens1, gens2));
|
||||
gens3 = gens3 * a + c;
|
||||
gens4 = gens4 * a + c;
|
||||
unalignedStore<UInt64x8>(output + 3 * sizeof(UInt64x8), combineValuesAVX512BW(gens3, gens4));
|
||||
output += bytes_per_write;
|
||||
}
|
||||
|
||||
// Process tail
|
||||
while ((end - output) > 0)
|
||||
{
|
||||
gens1 = gens1 * a + c;
|
||||
gens2 = gens2 * a + c;
|
||||
UInt64x8 values = combineValuesAVX512BW(gens1, gens2);
|
||||
for (int i = 0; i < vec_size && (end - output) > 0; ++i)
|
||||
{
|
||||
unalignedStore<UInt64>(output, values[i]);
|
||||
output += sizeof(UInt64);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void RandImpl::execute(char * output, size_t size)
|
||||
{
|
||||
#if USE_MULTITARGET_CODE
|
||||
if (isArchSupported(TargetArch::AVX512BW))
|
||||
{
|
||||
executeAVX512BW(output, size);
|
||||
return;
|
||||
}
|
||||
|
||||
if (isArchSupported(TargetArch::AVX2))
|
||||
{
|
||||
executeAVX2(output, size);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
randImpl(output, size);
|
||||
}
|
||||
}
|
||||
|
@ -36,18 +36,20 @@ namespace ErrorCodes
|
||||
* This means that the timer must be of sufficient resolution to give different values to each columns.
|
||||
*/
|
||||
|
||||
DECLARE_MULTITARGET_CODE(
|
||||
|
||||
struct RandImpl
|
||||
{
|
||||
/// Fill memory with random data. The memory region must be 15-bytes padded.
|
||||
static void execute(char * output, size_t size);
|
||||
|
||||
#if USE_MULTITARGET_CODE
|
||||
/// Assumes isArchSupported has been verified before calling
|
||||
static void executeAVX2(char * output, size_t size);
|
||||
static void executeAVX512BW(char * output, size_t size);
|
||||
#endif
|
||||
};
|
||||
|
||||
) // DECLARE_MULTITARGET_CODE
|
||||
|
||||
template <typename RandImpl, typename ToType, typename Name>
|
||||
class FunctionRandomImpl : public IFunction
|
||||
template <typename ToType, typename Name>
|
||||
class FunctionRandom : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = Name::name;
|
||||
@ -85,35 +87,8 @@ public:
|
||||
|
||||
return col_to;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ToType, typename Name>
|
||||
class FunctionRandom : public FunctionRandomImpl<TargetSpecific::Default::RandImpl, ToType, Name>
|
||||
{
|
||||
public:
|
||||
explicit FunctionRandom(ContextPtr context) : selector(context)
|
||||
{
|
||||
selector.registerImplementation<TargetArch::Default,
|
||||
FunctionRandomImpl<TargetSpecific::Default::RandImpl, ToType, Name>>();
|
||||
|
||||
#if USE_MULTITARGET_CODE
|
||||
selector.registerImplementation<TargetArch::AVX2,
|
||||
FunctionRandomImpl<TargetSpecific::AVX2::RandImpl, ToType, Name>>();
|
||||
#endif
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
{
|
||||
return selector.selectAndExecute(arguments, result_type, input_rows_count);
|
||||
}
|
||||
|
||||
static FunctionPtr create(ContextPtr context)
|
||||
{
|
||||
return std::make_shared<FunctionRandom<ToType, Name>>(context);
|
||||
}
|
||||
|
||||
private:
|
||||
ImplementationSelector<IFunction> selector;
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionRandom<ToType, Name>>(); }
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -37,10 +37,24 @@ struct NameCanonicalRand
|
||||
static constexpr auto name = "randCanonical";
|
||||
};
|
||||
|
||||
class FunctionCanonicalRand : public FunctionRandomImpl<CanonicalRandImpl, Float64, NameCanonicalRand>
|
||||
class FunctionCanonicalRand : public FunctionRandom<Float64, NameCanonicalRand>
|
||||
{
|
||||
using ToType = Float64;
|
||||
|
||||
public:
|
||||
static FunctionPtr create(ContextPtr /*context*/) { return std::make_shared<FunctionCanonicalRand>(); }
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
auto col_to = ColumnVector<ToType>::create();
|
||||
typename ColumnVector<ToType>::Container & vec_to = col_to->getData();
|
||||
|
||||
size_t size = input_rows_count;
|
||||
vec_to.resize(size);
|
||||
CanonicalRandImpl::execute(reinterpret_cast<char *>(vec_to.data()), vec_to.size() * sizeof(ToType));
|
||||
|
||||
return col_to;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -2,15 +2,49 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/FunctionsRandom.h>
|
||||
#include <Common/TargetSpecific.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
#define DECLARE_SEVERAL_IMPLEMENTATIONS(...) \
|
||||
DECLARE_DEFAULT_CODE (__VA_ARGS__) \
|
||||
DECLARE_AVX2_SPECIFIC_CODE(__VA_ARGS__)
|
||||
namespace
|
||||
{
|
||||
|
||||
DECLARE_SEVERAL_IMPLEMENTATIONS(
|
||||
void generateUUID4Generic(ColumnVector<UUID>::Container & vec_to)
|
||||
{
|
||||
RandImpl::execute(reinterpret_cast<char *>(vec_to.data()), vec_to.size() * sizeof(UUID));
|
||||
for (UUID & uuid : vec_to)
|
||||
{
|
||||
/// https://tools.ietf.org/html/rfc4122#section-4.4
|
||||
UUIDHelpers::getHighBytes(uuid) = (UUIDHelpers::getHighBytes(uuid) & 0xffffffffffff0fffull) | 0x0000000000004000ull;
|
||||
UUIDHelpers::getLowBytes(uuid) = (UUIDHelpers::getLowBytes(uuid) & 0x3fffffffffffffffull) | 0x8000000000000000ull;
|
||||
}
|
||||
}
|
||||
|
||||
#if USE_MULTITARGET_CODE
|
||||
|
||||
AVX2_FUNCTION_SPECIFIC_ATTRIBUTE void NO_INLINE generateUUID4AVX2(ColumnVector<UUID>::Container & vec_to)
|
||||
{
|
||||
RandImpl::executeAVX2(reinterpret_cast<char *>(vec_to.data()), vec_to.size() * sizeof(UUID));
|
||||
for (UUID & uuid : vec_to)
|
||||
{
|
||||
UUIDHelpers::getHighBytes(uuid) = (UUIDHelpers::getHighBytes(uuid) & 0xffffffffffff0fffull) | 0x0000000000004000ull;
|
||||
UUIDHelpers::getLowBytes(uuid) = (UUIDHelpers::getLowBytes(uuid) & 0x3fffffffffffffffull) | 0x8000000000000000ull;
|
||||
}
|
||||
}
|
||||
|
||||
AVX512BW_FUNCTION_SPECIFIC_ATTRIBUTE void NO_INLINE generateUUID4AVX512BW(ColumnVector<UUID>::Container & vec_to)
|
||||
{
|
||||
RandImpl::executeAVX512BW(reinterpret_cast<char *>(vec_to.data()), vec_to.size() * sizeof(UUID));
|
||||
for (UUID & uuid : vec_to)
|
||||
{
|
||||
UUIDHelpers::getHighBytes(uuid) = (UUIDHelpers::getHighBytes(uuid) & 0xffffffffffff0fffull) | 0x0000000000004000ull;
|
||||
UUIDHelpers::getLowBytes(uuid) = (UUIDHelpers::getLowBytes(uuid) & 0x3fffffffffffffffull) | 0x8000000000000000ull;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
class FunctionGenerateUUIDv4 : public IFunction
|
||||
{
|
||||
@ -44,51 +78,27 @@ public:
|
||||
|
||||
size_t size = input_rows_count;
|
||||
vec_to.resize(size);
|
||||
|
||||
/// RandImpl is target-dependent and is not the same in different TargetSpecific namespaces.
|
||||
RandImpl::execute(reinterpret_cast<char *>(vec_to.data()), vec_to.size() * sizeof(UUID));
|
||||
|
||||
for (UUID & uuid : vec_to)
|
||||
{
|
||||
/// https://tools.ietf.org/html/rfc4122#section-4.4
|
||||
|
||||
UUIDHelpers::getHighBytes(uuid) = (UUIDHelpers::getHighBytes(uuid) & 0xffffffffffff0fffull) | 0x0000000000004000ull;
|
||||
UUIDHelpers::getLowBytes(uuid) = (UUIDHelpers::getLowBytes(uuid) & 0x3fffffffffffffffull) | 0x8000000000000000ull;
|
||||
}
|
||||
|
||||
return col_res;
|
||||
}
|
||||
};
|
||||
|
||||
) // DECLARE_SEVERAL_IMPLEMENTATIONS
|
||||
#undef DECLARE_SEVERAL_IMPLEMENTATIONS
|
||||
|
||||
class FunctionGenerateUUIDv4 : public TargetSpecific::Default::FunctionGenerateUUIDv4
|
||||
{
|
||||
public:
|
||||
explicit FunctionGenerateUUIDv4(ContextPtr context) : selector(context)
|
||||
{
|
||||
selector.registerImplementation<TargetArch::Default,
|
||||
TargetSpecific::Default::FunctionGenerateUUIDv4>();
|
||||
if (!size)
|
||||
return col_res;
|
||||
|
||||
#if USE_MULTITARGET_CODE
|
||||
selector.registerImplementation<TargetArch::AVX2,
|
||||
TargetSpecific::AVX2::FunctionGenerateUUIDv4>();
|
||||
if (isArchSupported(TargetArch::AVX512BW))
|
||||
{
|
||||
generateUUID4AVX512BW(vec_to);
|
||||
return col_res;
|
||||
}
|
||||
|
||||
if (isArchSupported(TargetArch::AVX2))
|
||||
{
|
||||
generateUUID4AVX2(vec_to);
|
||||
return col_res;
|
||||
}
|
||||
#endif
|
||||
generateUUID4Generic(vec_to);
|
||||
return col_res;
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
{
|
||||
return selector.selectAndExecute(arguments, result_type, input_rows_count);
|
||||
}
|
||||
|
||||
static FunctionPtr create(ContextPtr context)
|
||||
{
|
||||
return std::make_shared<FunctionGenerateUUIDv4>(context);
|
||||
}
|
||||
|
||||
private:
|
||||
ImplementationSelector<IFunction> selector;
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionGenerateUUIDv4>(); }
|
||||
};
|
||||
|
||||
REGISTER_FUNCTION(GenerateUUIDv4)
|
||||
|
@ -107,7 +107,7 @@ public:
|
||||
|
||||
typename ColumnVector<ToType>::Container vec_to(1);
|
||||
|
||||
TargetSpecific::Default::RandImpl::execute(reinterpret_cast<char *>(vec_to.data()), sizeof(ToType));
|
||||
RandImpl::execute(reinterpret_cast<char *>(vec_to.data()), sizeof(ToType));
|
||||
ToType value = vec_to[0];
|
||||
|
||||
return std::make_unique<FunctionBaseRandomConstant<ToType, Name>>(value, argument_types, return_type);
|
||||
|
@ -25,8 +25,7 @@ namespace
|
||||
{
|
||||
|
||||
/* Generate random fixed string with fully random bytes (including zero). */
|
||||
template <typename RandImpl>
|
||||
class FunctionRandomFixedStringImpl : public IFunction
|
||||
class FunctionRandomFixedString : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "randomFixedString";
|
||||
@ -74,34 +73,8 @@ public:
|
||||
|
||||
return col_to;
|
||||
}
|
||||
};
|
||||
|
||||
class FunctionRandomFixedString : public FunctionRandomFixedStringImpl<TargetSpecific::Default::RandImpl>
|
||||
{
|
||||
public:
|
||||
explicit FunctionRandomFixedString(ContextPtr context) : selector(context)
|
||||
{
|
||||
selector.registerImplementation<TargetArch::Default,
|
||||
FunctionRandomFixedStringImpl<TargetSpecific::Default::RandImpl>>();
|
||||
|
||||
#if USE_MULTITARGET_CODE
|
||||
selector.registerImplementation<TargetArch::AVX2,
|
||||
FunctionRandomFixedStringImpl<TargetSpecific::AVX2::RandImpl>>();
|
||||
#endif
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
{
|
||||
return selector.selectAndExecute(arguments, result_type, input_rows_count);
|
||||
}
|
||||
|
||||
static FunctionPtr create(ContextPtr context)
|
||||
{
|
||||
return std::make_shared<FunctionRandomFixedString>(context);
|
||||
}
|
||||
|
||||
private:
|
||||
ImplementationSelector<IFunction> selector;
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionRandomFixedString>(); }
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -23,8 +23,7 @@ namespace
|
||||
{
|
||||
|
||||
/* Generate random string of specified length with fully random bytes (including zero). */
|
||||
template <typename RandImpl>
|
||||
class FunctionRandomStringImpl : public IFunction
|
||||
class FunctionRandomString : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "randomString";
|
||||
@ -92,34 +91,8 @@ public:
|
||||
|
||||
return col_to;
|
||||
}
|
||||
};
|
||||
|
||||
class FunctionRandomString : public FunctionRandomStringImpl<TargetSpecific::Default::RandImpl>
|
||||
{
|
||||
public:
|
||||
explicit FunctionRandomString(ContextPtr context) : selector(context)
|
||||
{
|
||||
selector.registerImplementation<TargetArch::Default,
|
||||
FunctionRandomStringImpl<TargetSpecific::Default::RandImpl>>();
|
||||
|
||||
#if USE_MULTITARGET_CODE
|
||||
selector.registerImplementation<TargetArch::AVX2,
|
||||
FunctionRandomStringImpl<TargetSpecific::AVX2::RandImpl>>();
|
||||
#endif
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
{
|
||||
return selector.selectAndExecute(arguments, result_type, input_rows_count);
|
||||
}
|
||||
|
||||
static FunctionPtr create(ContextPtr context)
|
||||
{
|
||||
return std::make_shared<FunctionRandomString>(context);
|
||||
}
|
||||
|
||||
private:
|
||||
ImplementationSelector<IFunction> selector;
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionRandomString>(); }
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -19,6 +19,4 @@
|
||||
<uncompressed_cache_size>1000000000</uncompressed_cache_size>
|
||||
|
||||
<asynchronous_metrics_update_period_s>10</asynchronous_metrics_update_period_s>
|
||||
|
||||
<remap_executable replace="replace">true</remap_executable>
|
||||
</clickhouse>
|
||||
|
Loading…
Reference in New Issue
Block a user