mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-26 01:22:04 +00:00
More rand implementations
This commit is contained in:
parent
66d530e901
commit
90bc3e6136
@ -3,19 +3,18 @@
|
|||||||
#include <Common/HashTable/Hash.h>
|
#include <Common/HashTable/Hash.h>
|
||||||
#include <Common/randomSeed.h>
|
#include <Common/randomSeed.h>
|
||||||
#include <common/unaligned.h>
|
#include <common/unaligned.h>
|
||||||
|
#include <x86intrin.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
/*
|
|
||||||
|
|
||||||
// TODO(dakovalkov): remove this workaround.
|
// TODO(dakovalkov): remove this workaround.
|
||||||
#pragma GCC diagnostic ignored "-Wvector-operation-performance"
|
#if !defined(__clang__)
|
||||||
|
# pragma GCC diagnostic ignored "-Wvector-operation-performance"
|
||||||
|
#endif
|
||||||
|
|
||||||
DECLARE_MULTITARGET_CODE(
|
DECLARE_MULTITARGET_CODE(
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
/// NOTE Probably
|
/// NOTE Probably
|
||||||
@ -45,10 +44,16 @@ namespace
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
void seed(LinearCongruentialGenerator & generator, intptr_t additional_seed)
|
UInt64 calcSeed(UInt64 rand_seed, UInt64 additional_seed)
|
||||||
{
|
{
|
||||||
generator.seed(intHash64(randomSeed() ^ intHash64(additional_seed)));
|
return intHash64(rand_seed ^ intHash64(additional_seed));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void seed(LinearCongruentialGenerator & generator, UInt64 rand_seed, intptr_t additional_seed)
|
||||||
|
{
|
||||||
|
generator.seed(calcSeed(rand_seed, additional_seed));
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void RandImpl::execute(char * output, size_t size)
|
void RandImpl::execute(char * output, size_t size)
|
||||||
@ -58,10 +63,12 @@ void RandImpl::execute(char * output, size_t size)
|
|||||||
LinearCongruentialGenerator generator2;
|
LinearCongruentialGenerator generator2;
|
||||||
LinearCongruentialGenerator generator3;
|
LinearCongruentialGenerator generator3;
|
||||||
|
|
||||||
seed(generator0, 0xfb4121280b2ab902ULL + reinterpret_cast<intptr_t>(output));
|
UInt64 rand_seed = randomSeed();
|
||||||
seed(generator1, 0x0121cf76df39c673ULL + reinterpret_cast<intptr_t>(output));
|
|
||||||
seed(generator2, 0x17ae86e3a19a602fULL + reinterpret_cast<intptr_t>(output));
|
seed(generator0, rand_seed, 0xfb4121280b2ab902ULL + reinterpret_cast<intptr_t>(output));
|
||||||
seed(generator3, 0x8b6e16da7e06d622ULL + reinterpret_cast<intptr_t>(output));
|
seed(generator1, rand_seed, 0x0121cf76df39c673ULL + reinterpret_cast<intptr_t>(output));
|
||||||
|
seed(generator2, rand_seed, 0x17ae86e3a19a602fULL + reinterpret_cast<intptr_t>(output));
|
||||||
|
seed(generator3, rand_seed, 0x8b6e16da7e06d622ULL + reinterpret_cast<intptr_t>(output));
|
||||||
|
|
||||||
for (const char * end = output + size; output < end; output += 16)
|
for (const char * end = output + size; output < end; output += 16)
|
||||||
{
|
{
|
||||||
@ -73,55 +80,6 @@ void RandImpl::execute(char * output, size_t size)
|
|||||||
/// It is guaranteed (by PaddedPODArray) that we can overwrite up to 15 bytes after end.
|
/// It is guaranteed (by PaddedPODArray) that we can overwrite up to 15 bytes after end.
|
||||||
}
|
}
|
||||||
|
|
||||||
void RandImpl2::execute(char * output, size_t size)
|
|
||||||
{
|
|
||||||
if (size == 0)
|
|
||||||
return;
|
|
||||||
|
|
||||||
LinearCongruentialGenerator generator0;
|
|
||||||
LinearCongruentialGenerator generator1;
|
|
||||||
LinearCongruentialGenerator generator2;
|
|
||||||
LinearCongruentialGenerator generator3;
|
|
||||||
LinearCongruentialGenerator generator4;
|
|
||||||
LinearCongruentialGenerator generator5;
|
|
||||||
LinearCongruentialGenerator generator6;
|
|
||||||
LinearCongruentialGenerator generator7;
|
|
||||||
|
|
||||||
seed(generator0, 0xfaaae481acb5874aULL + reinterpret_cast<intptr_t>(output));
|
|
||||||
seed(generator1, 0x3181a34f32887db6ULL + reinterpret_cast<intptr_t>(output));
|
|
||||||
seed(generator2, 0xb6970e4a91b66afdULL + reinterpret_cast<intptr_t>(output));
|
|
||||||
seed(generator3, 0xc16062649e83dc13ULL + reinterpret_cast<intptr_t>(output));
|
|
||||||
seed(generator4, 0xbb093972da5c8d92ULL + reinterpret_cast<intptr_t>(output));
|
|
||||||
seed(generator5, 0xc37dcc410dcfed31ULL + reinterpret_cast<intptr_t>(output));
|
|
||||||
seed(generator6, 0x45e1526b7a4367d5ULL + reinterpret_cast<intptr_t>(output));
|
|
||||||
seed(generator7, 0x99c2759203868a7fULL + reinterpret_cast<intptr_t>(output));
|
|
||||||
|
|
||||||
const char * end = output + size;
|
|
||||||
|
|
||||||
for (; (end - output + 15) <= 32; output += 32)
|
|
||||||
{
|
|
||||||
unalignedStore<UInt32>(output, generator0.next());
|
|
||||||
unalignedStore<UInt32>(output + 4, generator1.next());
|
|
||||||
unalignedStore<UInt32>(output + 8, generator2.next());
|
|
||||||
unalignedStore<UInt32>(output + 12, generator3.next());
|
|
||||||
unalignedStore<UInt32>(output + 16, generator4.next());
|
|
||||||
unalignedStore<UInt32>(output + 20, generator5.next());
|
|
||||||
unalignedStore<UInt32>(output + 24, generator6.next());
|
|
||||||
unalignedStore<UInt32>(output + 28, generator7.next());
|
|
||||||
}
|
|
||||||
|
|
||||||
if (end - output > 0)
|
|
||||||
{
|
|
||||||
unalignedStore<UInt32>(output, generator0.next());
|
|
||||||
unalignedStore<UInt32>(output + 4, generator1.next());
|
|
||||||
unalignedStore<UInt32>(output + 8, generator2.next());
|
|
||||||
unalignedStore<UInt32>(output + 12, generator3.next());
|
|
||||||
output += 16;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
|
|
||||||
typedef UInt64 UInt64x16 __attribute__ ((vector_size (128)));
|
typedef UInt64 UInt64x16 __attribute__ ((vector_size (128)));
|
||||||
typedef UInt64 UInt64x8 __attribute__ ((vector_size (64)));
|
typedef UInt64 UInt64x8 __attribute__ ((vector_size (64)));
|
||||||
typedef UInt64 UInt64x4 __attribute__ ((vector_size (32)));
|
typedef UInt64 UInt64x4 __attribute__ ((vector_size (32)));
|
||||||
@ -130,58 +88,85 @@ typedef UInt32 UInt32x16 __attribute__ ((vector_size (64)));
|
|||||||
typedef UInt32 UInt32x8 __attribute__ ((vector_size (32)));
|
typedef UInt32 UInt32x8 __attribute__ ((vector_size (32)));
|
||||||
typedef UInt32 UInt32x4 __attribute__ ((vector_size (16)));
|
typedef UInt32 UInt32x4 __attribute__ ((vector_size (16)));
|
||||||
|
|
||||||
void RandImpl3::execute(char * output, size_t size)
|
template <int Size>
|
||||||
|
struct DummyStruct;
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct DummyStruct<4>
|
||||||
{
|
{
|
||||||
|
using UInt64Type = UInt64x4;
|
||||||
|
using UInt32Type = UInt32x4;
|
||||||
|
};
|
||||||
|
template <>
|
||||||
|
struct DummyStruct<8>
|
||||||
|
{
|
||||||
|
using UInt64Type = UInt64x8;
|
||||||
|
using UInt32Type = UInt32x8;
|
||||||
|
};
|
||||||
|
template <>
|
||||||
|
struct DummyStruct<16>
|
||||||
|
{
|
||||||
|
using UInt64Type = UInt64x16;
|
||||||
|
using UInt32Type = UInt32x16;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <int Size>
|
||||||
|
using VecUInt64 = typename DummyStruct<Size>::UInt64Type;
|
||||||
|
template <int Size>
|
||||||
|
using VecUInt32 = typename DummyStruct<Size>::UInt32Type;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
constexpr std::array<UInt64, 16> random_numbers = {
|
||||||
|
0x0c8ff307dabc0c4cULL,
|
||||||
|
0xf4bce78bf3821c1bULL,
|
||||||
|
0x4eb628a1e189c21aULL,
|
||||||
|
0x85ae000d253e0dbcULL,
|
||||||
|
|
||||||
|
0xc98073e6480f8a10ULL,
|
||||||
|
0xb17e9b70a084d570ULL,
|
||||||
|
0x1361c752b768da8cULL,
|
||||||
|
0x3d915f60c06d144dULL,
|
||||||
|
|
||||||
|
0xd5bc9b7aced79587ULL,
|
||||||
|
0x66c28000ba8a66cfULL,
|
||||||
|
0x0fb58da7a48820f5ULL,
|
||||||
|
0x540ee1b57aa861a1ULL,
|
||||||
|
|
||||||
|
0x212f11936ef2db04ULL,
|
||||||
|
0xa3939cd900edcc58ULL,
|
||||||
|
0xc676c84420170102ULL,
|
||||||
|
0xcbdc824e8b4bf3edULL,
|
||||||
|
};
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
template <int VectorSize>
|
||||||
|
void RandVecImpl<VectorSize>::execute(char * output, size_t size)
|
||||||
|
{
|
||||||
|
static_assert(VectorSize >= 4);
|
||||||
|
static_assert(VectorSize <= random_numbers.size());
|
||||||
|
|
||||||
if (size == 0)
|
if (size == 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
char * end = output + size;
|
char * end = output + size;
|
||||||
|
|
||||||
UInt64x4 generators = {
|
|
||||||
0xfb4121280b2ab902ULL + reinterpret_cast<intptr_t>(output),
|
|
||||||
0x0121cf76df39c673ULL + reinterpret_cast<intptr_t>(output),
|
|
||||||
0x17ae86e3a19a602fULL + reinterpret_cast<intptr_t>(output),
|
|
||||||
0x8b6e16da7e06d622ULL + reinterpret_cast<intptr_t>(output),
|
|
||||||
};
|
|
||||||
|
|
||||||
constexpr int bytes_per_write = sizeof(UInt32x4);
|
|
||||||
constexpr int safe_overwrite = 15;
|
constexpr int safe_overwrite = 15;
|
||||||
|
constexpr int bytes_per_write = sizeof(VecUInt32<VectorSize>);
|
||||||
|
|
||||||
|
UInt64 rand_seed = randomSeed();
|
||||||
|
|
||||||
|
VecUInt64<VectorSize> generators{};
|
||||||
|
for (int i = 0; i < VectorSize; ++i)
|
||||||
|
generators[i] = calcSeed(rand_seed, random_numbers[VectorSize] + reinterpret_cast<intptr_t>(output));
|
||||||
|
|
||||||
while ((end - output) + safe_overwrite >= bytes_per_write)
|
while ((end - output) + safe_overwrite >= bytes_per_write)
|
||||||
{
|
{
|
||||||
generators *= LinearCongruentialGenerator::a;
|
generators *= LinearCongruentialGenerator::a;
|
||||||
generators += LinearCongruentialGenerator::c;
|
generators += LinearCongruentialGenerator::c;
|
||||||
unalignedStore<UInt32x4>(output, __builtin_convertvector(generators, UInt32x4));
|
VecUInt32<VectorSize> values = __builtin_convertvector(generators >> 16, VecUInt32<VectorSize>);
|
||||||
output += bytes_per_write;
|
unalignedStore<VecUInt32<VectorSize>>(output, values);
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void RandImpl4::execute(char * output, size_t size)
|
|
||||||
{
|
|
||||||
if (size == 0)
|
|
||||||
return;
|
|
||||||
|
|
||||||
char * end = output + size;
|
|
||||||
|
|
||||||
UInt64x8 generators = {
|
|
||||||
0x5f186ce5faee450bULL + reinterpret_cast<intptr_t>(output),
|
|
||||||
0x9adb2ca3c72ac2eeULL + reinterpret_cast<intptr_t>(output),
|
|
||||||
0x07acf8bfa2537705ULL + reinterpret_cast<intptr_t>(output),
|
|
||||||
0x692b1b533834db92ULL + reinterpret_cast<intptr_t>(output),
|
|
||||||
0x5148b84cdda30081ULL + reinterpret_cast<intptr_t>(output),
|
|
||||||
0xe17b8a75a301ad47ULL + reinterpret_cast<intptr_t>(output),
|
|
||||||
0x6d4a5d69ed2a5f56ULL + reinterpret_cast<intptr_t>(output),
|
|
||||||
0x114e23266201b333ULL + reinterpret_cast<intptr_t>(output),
|
|
||||||
};
|
|
||||||
|
|
||||||
constexpr int bytes_per_write = sizeof(UInt32x8);
|
|
||||||
constexpr int safe_overwrite = 15;
|
|
||||||
|
|
||||||
while ((end - output) + safe_overwrite >= bytes_per_write)
|
|
||||||
{
|
|
||||||
generators *= LinearCongruentialGenerator::a;
|
|
||||||
generators += LinearCongruentialGenerator::c;
|
|
||||||
unalignedStore<UInt32x8>(output, __builtin_convertvector(generators, UInt32x8));
|
|
||||||
output += bytes_per_write;
|
output += bytes_per_write;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -189,7 +174,7 @@ void RandImpl4::execute(char * output, size_t size)
|
|||||||
{
|
{
|
||||||
generators *= LinearCongruentialGenerator::a;
|
generators *= LinearCongruentialGenerator::a;
|
||||||
generators += LinearCongruentialGenerator::c;
|
generators += LinearCongruentialGenerator::c;
|
||||||
UInt32x8 values = __builtin_convertvector(generators, UInt32x8);
|
VecUInt32<VectorSize> values = __builtin_convertvector(generators >> 16, VecUInt32<VectorSize>);
|
||||||
for (int i = 0; (end - output) > 0; ++i)
|
for (int i = 0; (end - output) > 0; ++i)
|
||||||
{
|
{
|
||||||
unalignedStore<UInt32>(output, values[i]);
|
unalignedStore<UInt32>(output, values[i]);
|
||||||
@ -198,49 +183,50 @@ void RandImpl4::execute(char * output, size_t size)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RandImpl5::execute(char * output, size_t size)
|
template struct RandVecImpl<4>;
|
||||||
|
template struct RandVecImpl<8>;
|
||||||
|
template struct RandVecImpl<16>;
|
||||||
|
|
||||||
|
template <int VectorSize>
|
||||||
|
void RandVecImpl2<VectorSize>::execute(char * output, size_t size)
|
||||||
{
|
{
|
||||||
|
static_assert(VectorSize >= 4);
|
||||||
|
|
||||||
if (size == 0)
|
if (size == 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
char * end = output + size;
|
char * end = output + size;
|
||||||
|
|
||||||
UInt64x16 generators = {
|
|
||||||
0xfb4121280b2ab902ULL + reinterpret_cast<intptr_t>(output),
|
|
||||||
0x0121cf76df39c673ULL + reinterpret_cast<intptr_t>(output),
|
|
||||||
0x17ae86e3a19a602fULL + reinterpret_cast<intptr_t>(output),
|
|
||||||
0x8b6e16da7e06d622ULL + reinterpret_cast<intptr_t>(output),
|
|
||||||
0xfb4121f80b2ab902ULL + reinterpret_cast<intptr_t>(output),
|
|
||||||
0x0122cf767f39c633ULL + reinterpret_cast<intptr_t>(output),
|
|
||||||
0x14ae86e3a79a502fULL + reinterpret_cast<intptr_t>(output),
|
|
||||||
0x876316da7e06d622ULL + reinterpret_cast<intptr_t>(output),
|
|
||||||
0xfb4821280b2ab912ULL + reinterpret_cast<intptr_t>(output),
|
|
||||||
0x0126cf76df39c633ULL + reinterpret_cast<intptr_t>(output),
|
|
||||||
0x17a486e3a19a602fULL + reinterpret_cast<intptr_t>(output),
|
|
||||||
0x8b6216da7e08d622ULL + reinterpret_cast<intptr_t>(output),
|
|
||||||
0xfb4101f80b5ab902ULL + reinterpret_cast<intptr_t>(output),
|
|
||||||
0x01226f767f34c633ULL + reinterpret_cast<intptr_t>(output),
|
|
||||||
0x14ae86e3a75a502fULL + reinterpret_cast<intptr_t>(output),
|
|
||||||
0x876e36da7e36d622ULL + reinterpret_cast<intptr_t>(output),
|
|
||||||
};
|
|
||||||
|
|
||||||
constexpr int bytes_per_write = sizeof(UInt32x16);
|
|
||||||
constexpr int safe_overwrite = 15;
|
constexpr int safe_overwrite = 15;
|
||||||
|
constexpr int bytes_per_write = 2 * sizeof(VecUInt32<VectorSize>);
|
||||||
|
|
||||||
|
UInt64 rand_seed = randomSeed();
|
||||||
|
VecUInt64<VectorSize> gens1{}, gens2{};
|
||||||
|
for (int i = 0; i < VectorSize; ++i)
|
||||||
|
{
|
||||||
|
gens1[i] = calcSeed(rand_seed, i * 1123465ull * reinterpret_cast<intptr_t>(output));
|
||||||
|
gens2[i] = calcSeed(rand_seed, i * 6432453ull * reinterpret_cast<intptr_t>(output));
|
||||||
|
}
|
||||||
|
|
||||||
while ((end - output) + safe_overwrite >= bytes_per_write)
|
while ((end - output) + safe_overwrite >= bytes_per_write)
|
||||||
{
|
{
|
||||||
generators *= LinearCongruentialGenerator::a;
|
gens1 *= LinearCongruentialGenerator::a;
|
||||||
generators += LinearCongruentialGenerator::c;
|
gens1 += LinearCongruentialGenerator::c;
|
||||||
unalignedStore<UInt32x16>(output, __builtin_convertvector(generators, UInt32x16));
|
VecUInt32<VectorSize> values1 = __builtin_convertvector(gens1 >> 16, VecUInt32<VectorSize>);
|
||||||
|
unalignedStore<VecUInt32<VectorSize>>(output, values1);
|
||||||
|
gens2 *= LinearCongruentialGenerator::a;
|
||||||
|
gens2 += LinearCongruentialGenerator::c;
|
||||||
|
VecUInt32<VectorSize> values2 = __builtin_convertvector(gens2 >> 16, VecUInt32<VectorSize>);
|
||||||
|
unalignedStore<VecUInt32<VectorSize>>(output, values2);
|
||||||
output += bytes_per_write;
|
output += bytes_per_write;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((end - output) > 0)
|
while ((end - output) > 0)
|
||||||
{
|
{
|
||||||
generators *= LinearCongruentialGenerator::a;
|
gens1 *= LinearCongruentialGenerator::a;
|
||||||
generators += LinearCongruentialGenerator::c;
|
gens1 += LinearCongruentialGenerator::c;
|
||||||
UInt32x16 values = __builtin_convertvector(generators, UInt32x16);
|
VecUInt32<VectorSize> values = __builtin_convertvector(gens1 >> 16, VecUInt32<VectorSize>);
|
||||||
for (int i = 0; (end - output) > 0; ++i)
|
for (int i = 0; (end - output) > 0 && i < VectorSize; ++i)
|
||||||
{
|
{
|
||||||
unalignedStore<UInt32>(output, values[i]);
|
unalignedStore<UInt32>(output, values[i]);
|
||||||
output += sizeof(UInt32);
|
output += sizeof(UInt32);
|
||||||
@ -248,8 +234,73 @@ void RandImpl5::execute(char * output, size_t size)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template struct RandVecImpl2<4>;
|
||||||
|
template struct RandVecImpl2<8>;
|
||||||
|
template struct RandVecImpl2<16>;
|
||||||
|
|
||||||
|
// template <int VectorSize>
|
||||||
|
// void RandVecImpl4<VectorSize>::execute(char * output, size_t size)
|
||||||
|
// {
|
||||||
|
// static_assert(VectorSize >= 4);
|
||||||
|
|
||||||
|
// if (size == 0)
|
||||||
|
// return;
|
||||||
|
|
||||||
|
// char * end = output + size;
|
||||||
|
|
||||||
|
// constexpr int safe_overwrite = 15;
|
||||||
|
// constexpr int bytes_per_write = 4 * sizeof(VecUInt32<VectorSize>);
|
||||||
|
|
||||||
|
// VecUInt64<VectorSize> gens1{}, gens2{}, gens3{}, gens4{};
|
||||||
|
// for (int i = 0; i < VectorSize; ++i)
|
||||||
|
// {
|
||||||
|
// gens1[i] = calcSeed(i * 1123465ull * reinterpret_cast<intptr_t>(output));
|
||||||
|
// gens2[i] = calcSeed(i * 6432453ull * reinterpret_cast<intptr_t>(output));
|
||||||
|
// gens3[i] = calcSeed(i * 1346434ull * reinterpret_cast<intptr_t>(output));
|
||||||
|
// gens4[i] = calcSeed(i * 5344753ull * reinterpret_cast<intptr_t>(output));
|
||||||
|
// }
|
||||||
|
|
||||||
|
// while ((end - output) + safe_overwrite >= bytes_per_write)
|
||||||
|
// {
|
||||||
|
// gens1 *= LinearCongruentialGenerator::a;
|
||||||
|
// gens1 += LinearCongruentialGenerator::c;
|
||||||
|
// VecUInt32<VectorSize> values1 = __builtin_convertvector(gens1 >> 16, VecUInt32<VectorSize>);
|
||||||
|
// unalignedStore<VecUInt32<VectorSize>>(output, values1);
|
||||||
|
// gens2 *= LinearCongruentialGenerator::a;
|
||||||
|
// gens2 += LinearCongruentialGenerator::c;
|
||||||
|
// VecUInt32<VectorSize> values2 = __builtin_convertvector(gens2 >> 16, VecUInt32<VectorSize>);
|
||||||
|
// unalignedStore<VecUInt32<VectorSize>>(output, values2);
|
||||||
|
// gens3 *= LinearCongruentialGenerator::a;
|
||||||
|
// gens3 += LinearCongruentialGenerator::c;
|
||||||
|
// VecUInt32<VectorSize> values3 = __builtin_convertvector(gens3 >> 16, VecUInt32<VectorSize>);
|
||||||
|
// unalignedStore<VecUInt32<VectorSize>>(output, values3);
|
||||||
|
// gens4 *= LinearCongruentialGenerator::a;
|
||||||
|
// gens4 += LinearCongruentialGenerator::c;
|
||||||
|
// VecUInt32<VectorSize> values4 = __builtin_convertvector(gens4 >> 16, VecUInt32<VectorSize>);
|
||||||
|
// unalignedStore<VecUInt32<VectorSize>>(output, values4);
|
||||||
|
// output += bytes_per_write;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// while ((end - output) > 0)
|
||||||
|
// {
|
||||||
|
// gens1 *= LinearCongruentialGenerator::a;
|
||||||
|
// gens1 += LinearCongruentialGenerator::c;
|
||||||
|
// VecUInt32<VectorSize> values = __builtin_convertvector(gens1 >> 16, VecUInt32<VectorSize>);
|
||||||
|
// for (int i = 0; (end - output) > 0 && i < VectorSize; i += 4)
|
||||||
|
// {
|
||||||
|
// unalignedStore<UInt32>(output, values[i]);
|
||||||
|
// unalignedStore<UInt32>(output + 4, values[i + 1]);
|
||||||
|
// unalignedStore<UInt32>(output + 8, values[i + 2]);
|
||||||
|
// unalignedStore<UInt32>(output + 12, values[i + 3]);
|
||||||
|
// output += 16;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
// template struct RandVecImpl2<4>;
|
||||||
|
// template struct RandVecImpl2<8>;
|
||||||
|
// template struct RandVecImpl2<16>;
|
||||||
|
|
||||||
) //DECLARE_MULTITARGET_CODE
|
) //DECLARE_MULTITARGET_CODE
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -36,26 +36,20 @@ namespace ErrorCodes
|
|||||||
* This means that the timer must be of sufficient resolution to give different values to each block.
|
* This means that the timer must be of sufficient resolution to give different values to each block.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
|
||||||
|
|
||||||
DECLARE_MULTITARGET_CODE(
|
DECLARE_MULTITARGET_CODE(
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
struct RandImpl
|
struct RandImpl
|
||||||
{
|
{
|
||||||
static void execute(char * output, size_t size);
|
static void execute(char * output, size_t size);
|
||||||
static String getImplementationTag() { return ToString(TargetArch::Default); }
|
static String getImplementationTag() { return ToString(BuildArch); }
|
||||||
};
|
};
|
||||||
|
|
||||||
struct RandImpl2
|
struct RandImpl2
|
||||||
{
|
{
|
||||||
static void execute(char * output, size_t size);
|
static void execute(char * output, size_t size);
|
||||||
static String getImplementationTag() { return ToString(TargetArch::Default) + "_v2"; }
|
static String getImplementationTag() { return ToString(BuildArch) + "_v2"; }
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
|
||||||
|
|
||||||
struct RandImpl3
|
struct RandImpl3
|
||||||
{
|
{
|
||||||
static void execute(char * output, size_t size);
|
static void execute(char * output, size_t size);
|
||||||
@ -74,9 +68,27 @@ struct RandImpl5
|
|||||||
static String getImplementationTag() { return ToString(BuildArch) + "_v5"; }
|
static String getImplementationTag() { return ToString(BuildArch) + "_v5"; }
|
||||||
};
|
};
|
||||||
|
|
||||||
) // DECLARE_MULTITARGET_CODE
|
template <int VectorSize>
|
||||||
|
struct RandVecImpl
|
||||||
|
{
|
||||||
|
static void execute(char * outpu, size_t size);
|
||||||
|
static String getImplementationTag() { return ToString(BuildArch) + "_vec_" + toString(VectorSize); }
|
||||||
|
};
|
||||||
|
|
||||||
*/
|
template <int VectorSize>
|
||||||
|
struct RandVecImpl2
|
||||||
|
{
|
||||||
|
static void execute(char * outpu, size_t size);
|
||||||
|
static String getImplementationTag() { return ToString(BuildArch) + "_vec2_" + toString(VectorSize); }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct RandImpl6
|
||||||
|
{
|
||||||
|
static void execute(char * outpu, size_t size);
|
||||||
|
static String getImplementationTag() { return ToString(BuildArch) + "_v6"; }
|
||||||
|
};
|
||||||
|
|
||||||
|
) // DECLARE_MULTITARGET_CODE
|
||||||
|
|
||||||
template <typename RandImpl, typename ToType, typename Name>
|
template <typename RandImpl, typename ToType, typename Name>
|
||||||
class FunctionRandomImpl : public IFunction
|
class FunctionRandomImpl : public IFunction
|
||||||
@ -125,45 +137,80 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <typename ToType, typename Name>
|
template <typename ToType, typename Name>
|
||||||
class FunctionRandom : public FunctionRandomImpl<RandImpl2, ToType, Name>
|
class FunctionRandom : public FunctionRandomImpl<TargetSpecific::Default::RandImpl, ToType, Name>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
FunctionRandom(const Context & context) : selector(context)
|
FunctionRandom(const Context & context) : selector(context)
|
||||||
{
|
{
|
||||||
// selector.registerImplementation<TargetArch::Default,
|
|
||||||
// FunctionRandomImpl<TargetSpecific::Default::RandImpl, ToType, Name>>();
|
|
||||||
selector.registerImplementation<TargetArch::Default,
|
selector.registerImplementation<TargetArch::Default,
|
||||||
FunctionRandomImpl<RandImpl2, ToType, Name>>();
|
FunctionRandomImpl<TargetSpecific::Default::RandImpl, ToType, Name>>();
|
||||||
|
selector.registerImplementation<TargetArch::Default,
|
||||||
|
FunctionRandomImpl<TargetSpecific::Default::RandImpl2, ToType, Name>>();
|
||||||
|
|
||||||
// if constexpr (UseMultitargetCode)
|
if constexpr (UseMultitargetCode)
|
||||||
// {
|
{
|
||||||
// selector.registerImplementation<TargetArch::SSE42,
|
selector.registerImplementation<TargetArch::SSE42,
|
||||||
// FunctionRandomImpl<TargetSpecific::SSE42::RandImpl, ToType, Name>>();
|
FunctionRandomImpl<TargetSpecific::SSE42::RandImpl, ToType, Name>>();
|
||||||
// selector.registerImplementation<TargetArch::AVX,
|
selector.registerImplementation<TargetArch::AVX,
|
||||||
// FunctionRandomImpl<TargetSpecific::AVX::RandImpl, ToType, Name>>();
|
FunctionRandomImpl<TargetSpecific::AVX::RandImpl, ToType, Name>>();
|
||||||
// selector.registerImplementation<TargetArch::AVX2,
|
selector.registerImplementation<TargetArch::AVX2,
|
||||||
// FunctionRandomImpl<TargetSpecific::AVX2::RandImpl, ToType, Name>>();
|
FunctionRandomImpl<TargetSpecific::AVX2::RandImpl, ToType, Name>>();
|
||||||
// selector.registerImplementation<TargetArch::AVX512F,
|
selector.registerImplementation<TargetArch::AVX512F,
|
||||||
// FunctionRandomImpl<TargetSpecific::AVX512F::RandImpl, ToType, Name>>();
|
FunctionRandomImpl<TargetSpecific::AVX512F::RandImpl, ToType, Name>>();
|
||||||
|
|
||||||
// selector.registerImplementation<TargetArch::AVX2,
|
selector.registerImplementation<TargetArch::AVX2,
|
||||||
// FunctionRandomImpl<TargetSpecific::AVX2::RandImpl2, ToType, Name>>();
|
FunctionRandomImpl<TargetSpecific::AVX2::RandImpl2, ToType, Name>>();
|
||||||
|
|
||||||
// selector.registerImplementation<TargetArch::Default,
|
selector.registerImplementation<TargetArch::Default,
|
||||||
// FunctionRandomImpl<TargetSpecific::Default::RandImpl3, ToType, Name>>();
|
FunctionRandomImpl<TargetSpecific::Default::RandImpl3, ToType, Name>>();
|
||||||
// selector.registerImplementation<TargetArch::AVX2,
|
selector.registerImplementation<TargetArch::AVX2,
|
||||||
// FunctionRandomImpl<TargetSpecific::AVX2::RandImpl3, ToType, Name>>();
|
FunctionRandomImpl<TargetSpecific::AVX2::RandImpl3, ToType, Name>>();
|
||||||
|
|
||||||
// selector.registerImplementation<TargetArch::Default,
|
selector.registerImplementation<TargetArch::Default,
|
||||||
// FunctionRandomImpl<TargetSpecific::Default::RandImpl4, ToType, Name>>();
|
FunctionRandomImpl<TargetSpecific::Default::RandImpl4, ToType, Name>>();
|
||||||
// selector.registerImplementation<TargetArch::AVX2,
|
selector.registerImplementation<TargetArch::AVX2,
|
||||||
// FunctionRandomImpl<TargetSpecific::AVX2::RandImpl4, ToType, Name>>();
|
FunctionRandomImpl<TargetSpecific::AVX2::RandImpl4, ToType, Name>>();
|
||||||
|
|
||||||
// selector.registerImplementation<TargetArch::Default,
|
selector.registerImplementation<TargetArch::Default,
|
||||||
// FunctionRandomImpl<TargetSpecific::Default::RandImpl5, ToType, Name>>();
|
FunctionRandomImpl<TargetSpecific::Default::RandImpl5, ToType, Name>>();
|
||||||
// selector.registerImplementation<TargetArch::AVX2,
|
selector.registerImplementation<TargetArch::AVX2,
|
||||||
// FunctionRandomImpl<TargetSpecific::AVX2::RandImpl5, ToType, Name>>();
|
FunctionRandomImpl<TargetSpecific::AVX2::RandImpl5, ToType, Name>>();
|
||||||
// }
|
|
||||||
|
// vec impl
|
||||||
|
selector.registerImplementation<TargetArch::Default,
|
||||||
|
FunctionRandomImpl<TargetSpecific::Default::RandVecImpl<4>, ToType, Name>>();
|
||||||
|
selector.registerImplementation<TargetArch::AVX2,
|
||||||
|
FunctionRandomImpl<TargetSpecific::AVX2::RandVecImpl<4>, ToType, Name>>();
|
||||||
|
|
||||||
|
selector.registerImplementation<TargetArch::Default,
|
||||||
|
FunctionRandomImpl<TargetSpecific::Default::RandVecImpl<8>, ToType, Name>>();
|
||||||
|
selector.registerImplementation<TargetArch::AVX2,
|
||||||
|
FunctionRandomImpl<TargetSpecific::AVX2::RandVecImpl<8>, ToType, Name>>();
|
||||||
|
|
||||||
|
selector.registerImplementation<TargetArch::Default,
|
||||||
|
FunctionRandomImpl<TargetSpecific::Default::RandVecImpl<16>, ToType, Name>>();
|
||||||
|
selector.registerImplementation<TargetArch::AVX2,
|
||||||
|
FunctionRandomImpl<TargetSpecific::AVX2::RandVecImpl<16>, ToType, Name>>();
|
||||||
|
|
||||||
|
// vec impl 2
|
||||||
|
selector.registerImplementation<TargetArch::Default,
|
||||||
|
FunctionRandomImpl<TargetSpecific::Default::RandVecImpl2<4>, ToType, Name>>();
|
||||||
|
selector.registerImplementation<TargetArch::AVX2,
|
||||||
|
FunctionRandomImpl<TargetSpecific::AVX2::RandVecImpl2<4>, ToType, Name>>();
|
||||||
|
|
||||||
|
selector.registerImplementation<TargetArch::Default,
|
||||||
|
FunctionRandomImpl<TargetSpecific::Default::RandVecImpl2<8>, ToType, Name>>();
|
||||||
|
selector.registerImplementation<TargetArch::AVX2,
|
||||||
|
FunctionRandomImpl<TargetSpecific::AVX2::RandVecImpl2<8>, ToType, Name>>();
|
||||||
|
|
||||||
|
selector.registerImplementation<TargetArch::Default,
|
||||||
|
FunctionRandomImpl<TargetSpecific::Default::RandVecImpl2<16>, ToType, Name>>();
|
||||||
|
selector.registerImplementation<TargetArch::AVX2,
|
||||||
|
FunctionRandomImpl<TargetSpecific::AVX2::RandVecImpl2<16>, ToType, Name>>();
|
||||||
|
|
||||||
|
selector.registerImplementation<TargetArch::AVX2,
|
||||||
|
FunctionRandomImpl<TargetSpecific::AVX2::RandImpl6, ToType, Name>>();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
|
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
|
||||||
|
@ -33,7 +33,7 @@ public:
|
|||||||
size_t size = input_rows_count;
|
size_t size = input_rows_count;
|
||||||
vec_to.resize(size);
|
vec_to.resize(size);
|
||||||
// TODO(dakovalkov): rewrite this workaround
|
// TODO(dakovalkov): rewrite this workaround
|
||||||
RandImpl::execute(reinterpret_cast<char *>(vec_to.data()), vec_to.size() * sizeof(UInt128));
|
TargetSpecific::Default::RandImpl::execute(reinterpret_cast<char *>(vec_to.data()), vec_to.size() * sizeof(UInt128));
|
||||||
|
|
||||||
for (UInt128 & uuid: vec_to)
|
for (UInt128 & uuid: vec_to)
|
||||||
{
|
{
|
||||||
|
@ -100,7 +100,7 @@ public:
|
|||||||
|
|
||||||
typename ColumnVector<ToType>::Container vec_to(1);
|
typename ColumnVector<ToType>::Container vec_to(1);
|
||||||
// TODO(dakovalkov): Rewrite this workaround
|
// TODO(dakovalkov): Rewrite this workaround
|
||||||
RandImpl::execute(reinterpret_cast<char *>(vec_to.data()), sizeof(ToType));
|
TargetSpecific::Default::RandImpl::execute(reinterpret_cast<char *>(vec_to.data()), sizeof(ToType));
|
||||||
ToType value = vec_to[0];
|
ToType value = vec_to[0];
|
||||||
|
|
||||||
return std::make_unique<FunctionBaseRandomConstant<ToType, Name>>(value, argument_types, return_type);
|
return std::make_unique<FunctionBaseRandomConstant<ToType, Name>>(value, argument_types, return_type);
|
||||||
|
Loading…
Reference in New Issue
Block a user