More rand implementations

2024-11-29 11:02:08 +00:00 · 2020-05-20 14:42:21 +02:00 · 2020-05-20 14:42:21 +02:00 · 90bc3e6136
commit 90bc3e6136
parent 66d530e901
4 changed files with 272 additions and 174 deletions
--- a/src/Functions/FunctionsRandom.cpp
+++ b/src/Functions/FunctionsRandom.cpp
@ -3,19 +3,18 @@
 #include <Common/HashTable/Hash.h>
 #include <Common/randomSeed.h>
 #include <common/unaligned.h>
+#include <x86intrin.h>

 namespace DB
 {

-/*
-
 // TODO(dakovalkov): remove this workaround.
-#pragma GCC diagnostic ignored "-Wvector-operation-performance"
+#if !defined(__clang__)
+#  pragma GCC diagnostic ignored "-Wvector-operation-performance"
+#endif

 DECLARE_MULTITARGET_CODE(

-*/
-
 namespace
 {
    /// NOTE Probably
@ -45,10 +44,16 @@ namespace
        }
    };

-    void seed(LinearCongruentialGenerator & generator, intptr_t additional_seed)
+    UInt64 calcSeed(UInt64 rand_seed, UInt64 additional_seed)
    {
-        generator.seed(intHash64(randomSeed() ^ intHash64(additional_seed)));
+        return intHash64(rand_seed ^ intHash64(additional_seed));
    }
+
+    void seed(LinearCongruentialGenerator & generator, UInt64 rand_seed, intptr_t additional_seed)
+    {
+        generator.seed(calcSeed(rand_seed, additional_seed));
+    }
+
 }

 void RandImpl::execute(char * output, size_t size)
@ -58,10 +63,12 @@ void RandImpl::execute(char * output, size_t size)
    LinearCongruentialGenerator generator2;
    LinearCongruentialGenerator generator3;

-    seed(generator0, 0xfb4121280b2ab902ULL + reinterpret_cast<intptr_t>(output));
-    seed(generator1, 0x0121cf76df39c673ULL + reinterpret_cast<intptr_t>(output));
-    seed(generator2, 0x17ae86e3a19a602fULL + reinterpret_cast<intptr_t>(output));
-    seed(generator3, 0x8b6e16da7e06d622ULL + reinterpret_cast<intptr_t>(output));
+    UInt64 rand_seed = randomSeed();
+
+    seed(generator0, rand_seed, 0xfb4121280b2ab902ULL + reinterpret_cast<intptr_t>(output));
+    seed(generator1, rand_seed, 0x0121cf76df39c673ULL + reinterpret_cast<intptr_t>(output));
+    seed(generator2, rand_seed, 0x17ae86e3a19a602fULL + reinterpret_cast<intptr_t>(output));
+    seed(generator3, rand_seed, 0x8b6e16da7e06d622ULL + reinterpret_cast<intptr_t>(output));

    for (const char * end = output + size; output < end; output += 16)
    {
@ -73,55 +80,6 @@ void RandImpl::execute(char * output, size_t size)
    /// It is guaranteed (by PaddedPODArray) that we can overwrite up to 15 bytes after end.
 }

-void RandImpl2::execute(char * output, size_t size)
-{
-    if (size == 0)
-        return;
-
-    LinearCongruentialGenerator generator0;
-    LinearCongruentialGenerator generator1;
-    LinearCongruentialGenerator generator2;
-    LinearCongruentialGenerator generator3;
-    LinearCongruentialGenerator generator4;
-    LinearCongruentialGenerator generator5;
-    LinearCongruentialGenerator generator6;
-    LinearCongruentialGenerator generator7;
-
-    seed(generator0, 0xfaaae481acb5874aULL + reinterpret_cast<intptr_t>(output));
-    seed(generator1, 0x3181a34f32887db6ULL + reinterpret_cast<intptr_t>(output));
-    seed(generator2, 0xb6970e4a91b66afdULL + reinterpret_cast<intptr_t>(output));
-    seed(generator3, 0xc16062649e83dc13ULL + reinterpret_cast<intptr_t>(output));
-    seed(generator4, 0xbb093972da5c8d92ULL + reinterpret_cast<intptr_t>(output));
-    seed(generator5, 0xc37dcc410dcfed31ULL + reinterpret_cast<intptr_t>(output));
-    seed(generator6, 0x45e1526b7a4367d5ULL + reinterpret_cast<intptr_t>(output));
-    seed(generator7, 0x99c2759203868a7fULL + reinterpret_cast<intptr_t>(output));
-
-    const char * end = output + size;
-
-    for (; (end - output + 15) <= 32; output += 32)
-    {
-        unalignedStore<UInt32>(output,      generator0.next());
-        unalignedStore<UInt32>(output + 4,  generator1.next());
-        unalignedStore<UInt32>(output + 8,  generator2.next());
-        unalignedStore<UInt32>(output + 12, generator3.next());
-        unalignedStore<UInt32>(output + 16, generator4.next());
-        unalignedStore<UInt32>(output + 20, generator5.next());
-        unalignedStore<UInt32>(output + 24, generator6.next());
-        unalignedStore<UInt32>(output + 28, generator7.next());
-    }
-
-    if (end - output > 0)
-    {
-        unalignedStore<UInt32>(output,      generator0.next());
-        unalignedStore<UInt32>(output + 4,  generator1.next());
-        unalignedStore<UInt32>(output + 8,  generator2.next());
-        unalignedStore<UInt32>(output + 12, generator3.next());
-        output += 16;
-    }
-}
-
-/*
-
 typedef UInt64 UInt64x16 __attribute__ ((vector_size (128)));
 typedef UInt64 UInt64x8  __attribute__ ((vector_size (64)));
 typedef UInt64 UInt64x4  __attribute__ ((vector_size (32)));
@ -130,58 +88,85 @@ typedef UInt32 UInt32x16 __attribute__ ((vector_size (64)));
 typedef UInt32 UInt32x8  __attribute__ ((vector_size (32)));
 typedef UInt32 UInt32x4  __attribute__ ((vector_size (16)));

-void RandImpl3::execute(char * output, size_t size)
+template <int Size>
+struct DummyStruct;
+
+template <>
+struct DummyStruct<4>
 {
+    using UInt64Type = UInt64x4;
+    using UInt32Type = UInt32x4;
+};
+template <>
+struct DummyStruct<8>
+{
+    using UInt64Type = UInt64x8;
+    using UInt32Type = UInt32x8;
+};
+template <>
+struct DummyStruct<16>
+{
+    using UInt64Type = UInt64x16;
+    using UInt32Type = UInt32x16;
+};
+
+template <int Size>
+using VecUInt64 = typename DummyStruct<Size>::UInt64Type;
+template <int Size>
+using VecUInt32 = typename DummyStruct<Size>::UInt32Type;
+
+namespace {
+
+constexpr std::array<UInt64, 16> random_numbers = {
+    0x0c8ff307dabc0c4cULL,
+    0xf4bce78bf3821c1bULL,
+    0x4eb628a1e189c21aULL,
+    0x85ae000d253e0dbcULL,
+
+    0xc98073e6480f8a10ULL,
+    0xb17e9b70a084d570ULL,
+    0x1361c752b768da8cULL,
+    0x3d915f60c06d144dULL,
+
+    0xd5bc9b7aced79587ULL,
+    0x66c28000ba8a66cfULL,
+    0x0fb58da7a48820f5ULL,
+    0x540ee1b57aa861a1ULL,
+
+    0x212f11936ef2db04ULL,
+    0xa3939cd900edcc58ULL,
+    0xc676c84420170102ULL,
+    0xcbdc824e8b4bf3edULL,
+};
+
+};
+
+template <int VectorSize>
+void RandVecImpl<VectorSize>::execute(char * output, size_t size)
+{
+    static_assert(VectorSize >= 4);
+    static_assert(VectorSize <= random_numbers.size());
+
    if (size == 0)
        return;
    
    char * end = output + size;

-    UInt64x4 generators = {
-        0xfb4121280b2ab902ULL + reinterpret_cast<intptr_t>(output),
-        0x0121cf76df39c673ULL + reinterpret_cast<intptr_t>(output),
-        0x17ae86e3a19a602fULL + reinterpret_cast<intptr_t>(output),
-        0x8b6e16da7e06d622ULL + reinterpret_cast<intptr_t>(output),
-    };
-
-    constexpr int bytes_per_write = sizeof(UInt32x4);
    constexpr int safe_overwrite = 15;
+    constexpr int bytes_per_write = sizeof(VecUInt32<VectorSize>);
+
+    UInt64 rand_seed = randomSeed();
+
+    VecUInt64<VectorSize> generators{};
+    for (int i = 0; i < VectorSize; ++i)
+        generators[i] = calcSeed(rand_seed, random_numbers[VectorSize] + reinterpret_cast<intptr_t>(output));

    while ((end - output) + safe_overwrite >= bytes_per_write)
    {
        generators *= LinearCongruentialGenerator::a;
        generators += LinearCongruentialGenerator::c;
-        unalignedStore<UInt32x4>(output, __builtin_convertvector(generators, UInt32x4));
-        output += bytes_per_write;
-    }
-}
-
-void RandImpl4::execute(char * output, size_t size)
-{
-    if (size == 0)
-        return;
-    
-    char * end = output + size;
-
-    UInt64x8 generators = {
-        0x5f186ce5faee450bULL + reinterpret_cast<intptr_t>(output),
-        0x9adb2ca3c72ac2eeULL + reinterpret_cast<intptr_t>(output),
-        0x07acf8bfa2537705ULL + reinterpret_cast<intptr_t>(output),
-        0x692b1b533834db92ULL + reinterpret_cast<intptr_t>(output),
-        0x5148b84cdda30081ULL + reinterpret_cast<intptr_t>(output),
-        0xe17b8a75a301ad47ULL + reinterpret_cast<intptr_t>(output),
-        0x6d4a5d69ed2a5f56ULL + reinterpret_cast<intptr_t>(output),
-        0x114e23266201b333ULL + reinterpret_cast<intptr_t>(output),
-    };
-
-    constexpr int bytes_per_write = sizeof(UInt32x8);
-    constexpr int safe_overwrite = 15;
-
-    while ((end - output) + safe_overwrite >= bytes_per_write)
-    {
-        generators *= LinearCongruentialGenerator::a;
-        generators += LinearCongruentialGenerator::c;
-        unalignedStore<UInt32x8>(output, __builtin_convertvector(generators, UInt32x8));
+        VecUInt32<VectorSize> values = __builtin_convertvector(generators >> 16, VecUInt32<VectorSize>);
+        unalignedStore<VecUInt32<VectorSize>>(output, values);
        output += bytes_per_write;
    }

@ -189,7 +174,7 @@ void RandImpl4::execute(char * output, size_t size)
    {
        generators *= LinearCongruentialGenerator::a;
        generators += LinearCongruentialGenerator::c;
-        UInt32x8 values = __builtin_convertvector(generators, UInt32x8);
+        VecUInt32<VectorSize> values = __builtin_convertvector(generators >> 16, VecUInt32<VectorSize>);
        for (int i = 0; (end - output) > 0; ++i)
        {
            unalignedStore<UInt32>(output, values[i]);
@ -198,49 +183,50 @@ void RandImpl4::execute(char * output, size_t size)
    }
 }

-void RandImpl5::execute(char * output, size_t size)
+template struct RandVecImpl<4>;
+template struct RandVecImpl<8>;
+template struct RandVecImpl<16>;
+
+template <int VectorSize>
+void RandVecImpl2<VectorSize>::execute(char * output, size_t size)
 {
+    static_assert(VectorSize >= 4);
+
    if (size == 0)
        return;
    
    char * end = output + size;

-    UInt64x16 generators = {
-        0xfb4121280b2ab902ULL + reinterpret_cast<intptr_t>(output),
-        0x0121cf76df39c673ULL + reinterpret_cast<intptr_t>(output),
-        0x17ae86e3a19a602fULL + reinterpret_cast<intptr_t>(output),
-        0x8b6e16da7e06d622ULL + reinterpret_cast<intptr_t>(output),
-        0xfb4121f80b2ab902ULL + reinterpret_cast<intptr_t>(output),
-        0x0122cf767f39c633ULL + reinterpret_cast<intptr_t>(output),
-        0x14ae86e3a79a502fULL + reinterpret_cast<intptr_t>(output),
-        0x876316da7e06d622ULL + reinterpret_cast<intptr_t>(output),
-        0xfb4821280b2ab912ULL + reinterpret_cast<intptr_t>(output),
-        0x0126cf76df39c633ULL + reinterpret_cast<intptr_t>(output),
-        0x17a486e3a19a602fULL + reinterpret_cast<intptr_t>(output),
-        0x8b6216da7e08d622ULL + reinterpret_cast<intptr_t>(output),
-        0xfb4101f80b5ab902ULL + reinterpret_cast<intptr_t>(output),
-        0x01226f767f34c633ULL + reinterpret_cast<intptr_t>(output),
-        0x14ae86e3a75a502fULL + reinterpret_cast<intptr_t>(output),
-        0x876e36da7e36d622ULL + reinterpret_cast<intptr_t>(output),
-    };
-
-    constexpr int bytes_per_write = sizeof(UInt32x16);
    constexpr int safe_overwrite = 15;
+    constexpr int bytes_per_write = 2 * sizeof(VecUInt32<VectorSize>);
+
+    UInt64 rand_seed = randomSeed();
+    VecUInt64<VectorSize> gens1{}, gens2{};
+    for (int i = 0; i < VectorSize; ++i)
+    {
+        gens1[i] = calcSeed(rand_seed, i * 1123465ull * reinterpret_cast<intptr_t>(output));
+        gens2[i] = calcSeed(rand_seed, i * 6432453ull * reinterpret_cast<intptr_t>(output));
+    }

    while ((end - output) + safe_overwrite >= bytes_per_write)
    {
-        generators *= LinearCongruentialGenerator::a;
-        generators += LinearCongruentialGenerator::c;
-        unalignedStore<UInt32x16>(output, __builtin_convertvector(generators, UInt32x16));
+        gens1 *= LinearCongruentialGenerator::a;
+        gens1 += LinearCongruentialGenerator::c;
+        VecUInt32<VectorSize> values1 = __builtin_convertvector(gens1 >> 16, VecUInt32<VectorSize>);
+        unalignedStore<VecUInt32<VectorSize>>(output, values1);
+        gens2 *= LinearCongruentialGenerator::a;
+        gens2 += LinearCongruentialGenerator::c;
+        VecUInt32<VectorSize> values2 = __builtin_convertvector(gens2 >> 16, VecUInt32<VectorSize>);
+        unalignedStore<VecUInt32<VectorSize>>(output, values2);
        output += bytes_per_write;
    }
        
-    if ((end - output) > 0)
+    while ((end - output) > 0)
    {
-        generators *= LinearCongruentialGenerator::a;
-        generators += LinearCongruentialGenerator::c;
-        UInt32x16 values = __builtin_convertvector(generators, UInt32x16);
-        for (int i = 0; (end - output) > 0; ++i)
+        gens1 *= LinearCongruentialGenerator::a;
+        gens1 += LinearCongruentialGenerator::c;
+        VecUInt32<VectorSize> values = __builtin_convertvector(gens1 >> 16, VecUInt32<VectorSize>);
+        for (int i = 0; (end - output) > 0 && i < VectorSize; ++i)
        {
            unalignedStore<UInt32>(output, values[i]);
            output += sizeof(UInt32);
@ -248,8 +234,73 @@ void RandImpl5::execute(char * output, size_t size)
    }
 }

+template struct RandVecImpl2<4>;
+template struct RandVecImpl2<8>;
+template struct RandVecImpl2<16>;
+
+// template <int VectorSize>
+// void RandVecImpl4<VectorSize>::execute(char * output, size_t size)
+// {
+//     static_assert(VectorSize >= 4);
+
+//     if (size == 0)
+//         return;
+    
+//     char * end = output + size;
+
+//     constexpr int safe_overwrite = 15;
+//     constexpr int bytes_per_write = 4 * sizeof(VecUInt32<VectorSize>);
+
+//     VecUInt64<VectorSize> gens1{}, gens2{}, gens3{}, gens4{};
+//     for (int i = 0; i < VectorSize; ++i)
+//     {
+//         gens1[i] = calcSeed(i * 1123465ull * reinterpret_cast<intptr_t>(output));
+//         gens2[i] = calcSeed(i * 6432453ull * reinterpret_cast<intptr_t>(output));
+//         gens3[i] = calcSeed(i * 1346434ull * reinterpret_cast<intptr_t>(output));
+//         gens4[i] = calcSeed(i * 5344753ull * reinterpret_cast<intptr_t>(output));
+//     }
+
+//     while ((end - output) + safe_overwrite >= bytes_per_write)
+//     {
+//         gens1 *= LinearCongruentialGenerator::a;
+//         gens1 += LinearCongruentialGenerator::c;
+//         VecUInt32<VectorSize> values1 = __builtin_convertvector(gens1 >> 16, VecUInt32<VectorSize>);
+//         unalignedStore<VecUInt32<VectorSize>>(output, values1);
+//         gens2 *= LinearCongruentialGenerator::a;
+//         gens2 += LinearCongruentialGenerator::c;
+//         VecUInt32<VectorSize> values2 = __builtin_convertvector(gens2 >> 16, VecUInt32<VectorSize>);
+//         unalignedStore<VecUInt32<VectorSize>>(output, values2);
+//         gens3 *= LinearCongruentialGenerator::a;
+//         gens3 += LinearCongruentialGenerator::c;
+//         VecUInt32<VectorSize> values3 = __builtin_convertvector(gens3 >> 16, VecUInt32<VectorSize>);
+//         unalignedStore<VecUInt32<VectorSize>>(output, values3);
+//         gens4 *= LinearCongruentialGenerator::a;
+//         gens4 += LinearCongruentialGenerator::c;
+//         VecUInt32<VectorSize> values4 = __builtin_convertvector(gens4 >> 16, VecUInt32<VectorSize>);
+//         unalignedStore<VecUInt32<VectorSize>>(output, values4);
+//         output += bytes_per_write;
+//     }
+        
+//     while ((end - output) > 0)
+//     {
+//         gens1 *= LinearCongruentialGenerator::a;
+//         gens1 += LinearCongruentialGenerator::c;
+//         VecUInt32<VectorSize> values = __builtin_convertvector(gens1 >> 16, VecUInt32<VectorSize>);
+//         for (int i = 0; (end - output) > 0 && i < VectorSize; i += 4)
+//         {
+//             unalignedStore<UInt32>(output,      values[i]);
+//             unalignedStore<UInt32>(output + 4,  values[i + 1]);
+//             unalignedStore<UInt32>(output + 8,  values[i + 2]);
+//             unalignedStore<UInt32>(output + 12, values[i + 3]);
+//             output += 16;
+//         }
+//     }
+// }
+
+// template struct RandVecImpl2<4>; 
+// template struct RandVecImpl2<8>; 
+// template struct RandVecImpl2<16>; 
+
 ) //DECLARE_MULTITARGET_CODE

-*/
-
 }
--- a/src/Functions/FunctionsRandom.h
+++ b/src/Functions/FunctionsRandom.h
@ -36,26 +36,20 @@ namespace ErrorCodes
  * This means that the timer must be of sufficient resolution to give different values to each block.
  */

-/*
-
 DECLARE_MULTITARGET_CODE(

-*/
-
 struct RandImpl
 {
    static void execute(char * output, size_t size);
-    static String getImplementationTag() { return ToString(TargetArch::Default); }
+    static String getImplementationTag() { return ToString(BuildArch); }
 };

 struct RandImpl2
 {
    static void execute(char * output, size_t size);
-    static String getImplementationTag() { return ToString(TargetArch::Default) + "_v2"; }
+    static String getImplementationTag() { return ToString(BuildArch) + "_v2"; }
 };

-/*
-
 struct RandImpl3
 {
    static void execute(char * output, size_t size);
@ -74,9 +68,27 @@ struct RandImpl5
    static String getImplementationTag() { return ToString(BuildArch) + "_v5"; }
 };

-) // DECLARE_MULTITARGET_CODE
+template <int VectorSize>
+struct RandVecImpl
+{
+    static void execute(char * outpu, size_t size);
+    static String getImplementationTag() { return ToString(BuildArch) + "_vec_" + toString(VectorSize); }
+};

-*/
+template <int VectorSize>
+struct RandVecImpl2
+{
+    static void execute(char * outpu, size_t size);
+    static String getImplementationTag() { return ToString(BuildArch) + "_vec2_" + toString(VectorSize); }
+};
+
+struct RandImpl6
+{
+    static void execute(char * outpu, size_t size);
+    static String getImplementationTag() { return ToString(BuildArch) + "_v6"; }
+};
+
+) // DECLARE_MULTITARGET_CODE

 template <typename RandImpl, typename ToType, typename Name>
 class FunctionRandomImpl : public IFunction
@ -125,45 +137,80 @@ public:
 };

 template <typename ToType, typename Name>
-class FunctionRandom : public FunctionRandomImpl<RandImpl2, ToType, Name>
+class FunctionRandom : public FunctionRandomImpl<TargetSpecific::Default::RandImpl, ToType, Name>
 {
 public:
    FunctionRandom(const Context & context) : selector(context)
    {
-        // selector.registerImplementation<TargetArch::Default,
-        //     FunctionRandomImpl<TargetSpecific::Default::RandImpl, ToType, Name>>();
        selector.registerImplementation<TargetArch::Default,
-            FunctionRandomImpl<RandImpl2, ToType, Name>>();
+            FunctionRandomImpl<TargetSpecific::Default::RandImpl, ToType, Name>>();
+        selector.registerImplementation<TargetArch::Default,
+            FunctionRandomImpl<TargetSpecific::Default::RandImpl2, ToType, Name>>();

-        // if constexpr (UseMultitargetCode)
-        // {
-        //     selector.registerImplementation<TargetArch::SSE42,
-        //         FunctionRandomImpl<TargetSpecific::SSE42::RandImpl, ToType, Name>>();
-        //     selector.registerImplementation<TargetArch::AVX,
-        //         FunctionRandomImpl<TargetSpecific::AVX::RandImpl, ToType, Name>>();
-        //     selector.registerImplementation<TargetArch::AVX2,
-        //         FunctionRandomImpl<TargetSpecific::AVX2::RandImpl, ToType, Name>>();
-        //     selector.registerImplementation<TargetArch::AVX512F,
-        //         FunctionRandomImpl<TargetSpecific::AVX512F::RandImpl, ToType, Name>>();
+        if constexpr (UseMultitargetCode)
+        {
+            selector.registerImplementation<TargetArch::SSE42,
+                FunctionRandomImpl<TargetSpecific::SSE42::RandImpl, ToType, Name>>();
+            selector.registerImplementation<TargetArch::AVX,
+                FunctionRandomImpl<TargetSpecific::AVX::RandImpl, ToType, Name>>();
+            selector.registerImplementation<TargetArch::AVX2,
+                FunctionRandomImpl<TargetSpecific::AVX2::RandImpl, ToType, Name>>();
+            selector.registerImplementation<TargetArch::AVX512F,
+                FunctionRandomImpl<TargetSpecific::AVX512F::RandImpl, ToType, Name>>();

-        //     selector.registerImplementation<TargetArch::AVX2,
-        //         FunctionRandomImpl<TargetSpecific::AVX2::RandImpl2, ToType, Name>>();
+            selector.registerImplementation<TargetArch::AVX2,
+                FunctionRandomImpl<TargetSpecific::AVX2::RandImpl2, ToType, Name>>();

-        //     selector.registerImplementation<TargetArch::Default,
-        //         FunctionRandomImpl<TargetSpecific::Default::RandImpl3, ToType, Name>>();
-        //     selector.registerImplementation<TargetArch::AVX2,
-        //         FunctionRandomImpl<TargetSpecific::AVX2::RandImpl3, ToType, Name>>();
+            selector.registerImplementation<TargetArch::Default,
+                FunctionRandomImpl<TargetSpecific::Default::RandImpl3, ToType, Name>>();
+            selector.registerImplementation<TargetArch::AVX2,
+                FunctionRandomImpl<TargetSpecific::AVX2::RandImpl3, ToType, Name>>();

-        //     selector.registerImplementation<TargetArch::Default,
-        //         FunctionRandomImpl<TargetSpecific::Default::RandImpl4, ToType, Name>>();
-        //     selector.registerImplementation<TargetArch::AVX2,
-        //         FunctionRandomImpl<TargetSpecific::AVX2::RandImpl4, ToType, Name>>();
+            selector.registerImplementation<TargetArch::Default,
+                FunctionRandomImpl<TargetSpecific::Default::RandImpl4, ToType, Name>>();
+            selector.registerImplementation<TargetArch::AVX2,
+                FunctionRandomImpl<TargetSpecific::AVX2::RandImpl4, ToType, Name>>();

-        //     selector.registerImplementation<TargetArch::Default,
-        //         FunctionRandomImpl<TargetSpecific::Default::RandImpl5, ToType, Name>>();
-        //     selector.registerImplementation<TargetArch::AVX2,
-        //         FunctionRandomImpl<TargetSpecific::AVX2::RandImpl5, ToType, Name>>();
-        // }
+            selector.registerImplementation<TargetArch::Default,
+                FunctionRandomImpl<TargetSpecific::Default::RandImpl5, ToType, Name>>();
+            selector.registerImplementation<TargetArch::AVX2,
+                FunctionRandomImpl<TargetSpecific::AVX2::RandImpl5, ToType, Name>>();
+
+            // vec impl
+            selector.registerImplementation<TargetArch::Default,
+                FunctionRandomImpl<TargetSpecific::Default::RandVecImpl<4>, ToType, Name>>();
+            selector.registerImplementation<TargetArch::AVX2,
+                FunctionRandomImpl<TargetSpecific::AVX2::RandVecImpl<4>, ToType, Name>>();
+            
+            selector.registerImplementation<TargetArch::Default,
+                FunctionRandomImpl<TargetSpecific::Default::RandVecImpl<8>, ToType, Name>>();
+            selector.registerImplementation<TargetArch::AVX2,
+                FunctionRandomImpl<TargetSpecific::AVX2::RandVecImpl<8>, ToType, Name>>();
+
+            selector.registerImplementation<TargetArch::Default,
+                FunctionRandomImpl<TargetSpecific::Default::RandVecImpl<16>, ToType, Name>>();
+            selector.registerImplementation<TargetArch::AVX2,
+                FunctionRandomImpl<TargetSpecific::AVX2::RandVecImpl<16>, ToType, Name>>();
+
+            // vec impl 2
+            selector.registerImplementation<TargetArch::Default,
+                FunctionRandomImpl<TargetSpecific::Default::RandVecImpl2<4>, ToType, Name>>();
+            selector.registerImplementation<TargetArch::AVX2,
+                FunctionRandomImpl<TargetSpecific::AVX2::RandVecImpl2<4>, ToType, Name>>();
+            
+            selector.registerImplementation<TargetArch::Default,
+                FunctionRandomImpl<TargetSpecific::Default::RandVecImpl2<8>, ToType, Name>>();
+            selector.registerImplementation<TargetArch::AVX2,
+                FunctionRandomImpl<TargetSpecific::AVX2::RandVecImpl2<8>, ToType, Name>>();
+
+            selector.registerImplementation<TargetArch::Default,
+                FunctionRandomImpl<TargetSpecific::Default::RandVecImpl2<16>, ToType, Name>>();
+            selector.registerImplementation<TargetArch::AVX2,
+                FunctionRandomImpl<TargetSpecific::AVX2::RandVecImpl2<16>, ToType, Name>>();
+
+            selector.registerImplementation<TargetArch::AVX2,
+                FunctionRandomImpl<TargetSpecific::AVX2::RandImpl6, ToType, Name>>();
+        }
    }

    void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
--- a/src/Functions/generateUUIDv4.cpp
+++ b/src/Functions/generateUUIDv4.cpp
@ -33,7 +33,7 @@ public:
        size_t size = input_rows_count;
        vec_to.resize(size);
        // TODO(dakovalkov): rewrite this workaround
-        RandImpl::execute(reinterpret_cast<char *>(vec_to.data()), vec_to.size() * sizeof(UInt128));
+        TargetSpecific::Default::RandImpl::execute(reinterpret_cast<char *>(vec_to.data()), vec_to.size() * sizeof(UInt128));

        for (UInt128 & uuid: vec_to)
        {
--- a/src/Functions/randConstant.cpp
+++ b/src/Functions/randConstant.cpp
@ -100,7 +100,7 @@ public:

        typename ColumnVector<ToType>::Container vec_to(1);
        // TODO(dakovalkov): Rewrite this workaround
-        RandImpl::execute(reinterpret_cast<char *>(vec_to.data()), sizeof(ToType));
+        TargetSpecific::Default::RandImpl::execute(reinterpret_cast<char *>(vec_to.data()), sizeof(ToType));
        ToType value = vec_to[0];

        return std::make_unique<FunctionBaseRandomConstant<ToType, Name>>(value, argument_types, return_type);