Add mumurhash2

2024-11-21 23:21:59 +00:00 · 2018-07-30 15:19:22 +03:00 · 2018-07-30 15:19:22 +03:00 · 68bdc4eb12
commit 68bdc4eb12
parent 79607b9118
10 changed files with 653 additions and 4 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -283,6 +283,7 @@ include (cmake/find_contrib_lib.cmake)
 find_contrib_lib(cityhash)
 find_contrib_lib(farmhash)
 find_contrib_lib(metrohash)
+find_contrib_lib(murmurhash2)
 find_contrib_lib(btrie)
 find_contrib_lib(double-conversion)

--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@ -45,6 +45,10 @@ if (USE_INTERNAL_UNWIND_LIBRARY)
    add_subdirectory (libunwind)
 endif ()

+if (USE_INTERNAL_MURMURHASH2_LIBRARY)
+    add_subdirectory (libmurmurhash2)
+endif ()
+
 if (USE_INTERNAL_ZLIB_LIBRARY)
    add_subdirectory (${INTERNAL_ZLIB_NAME})
    # todo: make pull to Dead2/zlib-ng and remove:
--- a/contrib/libmurmurhash2/CMakeLists.txt
+++ b/contrib/libmurmurhash2/CMakeLists.txt
@ -0,0 +1,6 @@
+add_library(murmurhash2
+	src/murmurhash2.cpp
+	include/murmurhash2.h)
+
+target_include_directories (murmurhash2 PUBLIC include)
+target_include_directories (murmurhash2 PUBLIC src)
--- a/contrib/libmurmurhash2/LICENSE
+++ b/contrib/libmurmurhash2/LICENSE
@ -0,0 +1 @@
+MurmurHash2 was written by Austin Appleby, and is placed in the publicdomain. The author hereby disclaims copyright to this source code.
--- a/contrib/libmurmurhash2/README
+++ b/contrib/libmurmurhash2/README
@ -0,0 +1,6 @@
+Original URL: https://github.com/aappleby/smhasher
+
+version:
+commit  61a0530f28277f2e850bfc39600ce61d02b518de 
+authoer aappleby@gmail.com
+date    2016-01-09T06:07:17Z
--- a/contrib/libmurmurhash2/include/murmurhash2.h
+++ b/contrib/libmurmurhash2/include/murmurhash2.h
@ -0,0 +1,35 @@
+//-----------------------------------------------------------------------------
+// MurmurHash2 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+#ifndef _MURMURHASH2_H_
+#define _MURMURHASH2_H_
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER) && (_MSC_VER < 1600)
+
+typedef unsigned char uint8_t;
+typedef unsigned int uint32_t;
+typedef unsigned __int64 uint64_t;
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#include <stdint.h>
+
+#endif // !defined(_MSC_VER)
+
+uint32_t MurmurHash2        (const void * key, int len, uint32_t seed);
+uint64_t MurmurHash64A      (const void * key, int len, uint64_t seed);
+uint64_t MurmurHash64B      (const void * key, int len, uint64_t seed);
+uint32_t MurmurHash2A       (const void * key, int len, uint32_t seed);
+uint32_t MurmurHashNeutral2 (const void * key, int len, uint32_t seed);
+uint32_t MurmurHashAligned2 (const void * key, int len, uint32_t seed);
+
+#endif // _MURMURHASH2_H_
+
--- a/contrib/libmurmurhash2/src/murmurhash2.cpp
+++ b/contrib/libmurmurhash2/src/murmurhash2.cpp
@ -0,0 +1,421 @@
+// MurmurHash2 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+// Note - This code makes a few assumptions about how your machine behaves -
+
+// 1. We can read a 4-byte value from any address without crashing
+// 2. sizeof(int) == 4
+
+// And it has a few limitations -
+
+// 1. It will not work incrementally.
+// 2. It will not produce the same results on little-endian and big-endian
+//    machines.
+
+#include "murmurhash2.h"
+
+// Platform-specific functions and macros
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+#define BIG_CONSTANT(x) (x)
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+#endif // !defined(_MSC_VER)
+
+
+uint32_t MurmurHash2(const void * key, int len, uint32_t seed)
+{
+    // 'm' and 'r' are mixing constants generated offline.
+    // They're not really 'magic', they just happen to work well.
+
+    const uint32_t m = 0x5bd1e995;
+    const int r = 24;
+
+    // Initialize the hash to a 'random' value
+
+    uint32_t h = seed ^ len;
+
+    // Mix 4 bytes at a time into the hash
+
+    const unsigned char * data = reinterpret_cast<const unsigned char *>(key);
+
+    while (len >= 4)
+    {
+        uint32_t k = *reinterpret_cast<const uint32_t *>(data);
+        k *= m;
+        k ^= k >> r;
+        k *= m;
+
+        h *= m;
+        h ^= k;
+
+        data += 4;
+        len -= 4;
+    }
+
+    // Handle the last few bytes of the input array
+ 
+    switch (len)
+    {
+        case 3: h ^= data[2] << 16;
+        case 2: h ^= data[1] << 8;
+        case 1: h ^= data[0];
+                h *= m;
+    };
+
+    // Do a few final mixes of the hash to ensure the last few
+    // bytes are well-incorporated.
+
+    h ^= h >> 13;
+    h *= m;
+    h ^= h >> 15;
+
+    return h;
+} 
+
+// MurmurHash2, 64-bit versions, by Austin Appleby
+
+// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment 
+// and endian-ness issues if used across multiple platforms.
+
+// 64-bit hash for 64-bit platforms
+
+uint64_t MurmurHash64A(const void * key, int len, uint64_t seed)
+{
+    const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
+    const int r = 47;
+
+    uint64_t h = seed ^ (len * m);
+
+    const uint64_t * data = reinterpret_cast<const uint64_t *>(key);
+    const uint64_t * end = data + (len/8);
+
+    while (data != end)
+    {
+        uint64_t k = *data++;
+
+        k *= m; 
+        k ^= k >> r; 
+        k *= m; 
+
+        h ^= k;
+        h *= m; 
+    }
+
+    const unsigned char * data2 = reinterpret_cast<const unsigned char *>(data);
+
+    switch (len & 7)
+    {
+        case 7: h ^= static_cast<uint64_t>(data2[6]) << 48;
+        case 6: h ^= static_cast<uint64_t>(data2[5]) << 40;
+        case 5: h ^= static_cast<uint64_t>(data2[4]) << 32;
+        case 4: h ^= static_cast<uint64_t>(data2[3]) << 24;
+        case 3: h ^= static_cast<uint64_t>(data2[2]) << 16;
+        case 2: h ^= static_cast<uint64_t>(data2[1]) << 8;
+        case 1: h ^= static_cast<uint64_t>(data2[0]);
+                h *= m;
+    };
+
+    h ^= h >> r;
+    h *= m;
+    h ^= h >> r;
+
+    return h;
+} 
+
+
+// 64-bit hash for 32-bit platforms
+
+uint64_t MurmurHash64B(const void * key, int len, uint64_t seed)
+{
+    const uint32_t m = 0x5bd1e995;
+    const int r = 24;
+
+    uint32_t h1 = static_cast<uint32_t>(seed) ^ len;
+    uint32_t h2 = static_cast<uint32_t>(seed >> 32);
+
+    const uint32_t * data = reinterpret_cast<const uint32_t *>(key);
+
+    while (len >= 8)
+    {
+        uint32_t k1 = *data++;
+        k1 *= m; k1 ^= k1 >> r; k1 *= m;
+        h1 *= m; h1 ^= k1;
+        len -= 4;
+
+        uint32_t k2 = *data++;
+        k2 *= m; k2 ^= k2 >> r; k2 *= m;
+        h2 *= m; h2 ^= k2;
+        len -= 4;
+    }
+
+    if (len >= 4)
+    {
+        uint32_t k1 = *data++;
+        k1 *= m; k1 ^= k1 >> r; k1 *= m;
+        h1 *= m; h1 ^= k1;
+        len -= 4;
+    }
+
+    switch (len)
+    {
+        case 3: h2 ^= reinterpret_cast<const unsigned char *>(data)[2] << 16;
+        case 2: h2 ^= reinterpret_cast<const unsigned char *>(data)[1] << 8;
+        case 1: h2 ^= reinterpret_cast<const unsigned char *>(data)[0];
+                h2 *= m;
+    };
+
+    h1 ^= h2 >> 18; h1 *= m;
+    h2 ^= h1 >> 22; h2 *= m;
+    h1 ^= h2 >> 17; h1 *= m;
+    h2 ^= h1 >> 19; h2 *= m;
+
+    uint64_t h = h1;
+
+    h = (h << 32) | h2;
+
+    return h;
+} 
+
+// MurmurHash2A, by Austin Appleby
+
+// This is a variant of MurmurHash2 modified to use the Merkle-Damgard 
+// construction. Bulk speed should be identical to Murmur2, small-key speed 
+// will be 10%-20% slower due to the added overhead at the end of the hash.
+
+// This variant fixes a minor issue where null keys were more likely to
+// collide with each other than expected, and also makes the function
+// more amenable to incremental implementations.
+
+#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
+
+uint32_t MurmurHash2A(const void * key, int len, uint32_t seed)
+{
+    const uint32_t m = 0x5bd1e995;
+    const int r = 24;
+    uint32_t l = len;
+
+    const unsigned char * data = reinterpret_cast<const unsigned char *>(key);
+
+    uint32_t h = seed;
+
+    while (len >= 4)
+    {
+        uint32_t k = *reinterpret_cast<const uint32_t *>(data);
+        mmix(h,k);
+        data += 4;
+        len -= 4;
+    }
+
+    uint32_t t = 0;
+
+    switch (len)
+    {
+        case 3: t ^= data[2] << 16;
+        case 2: t ^= data[1] << 8;
+        case 1: t ^= data[0];
+    };
+
+    mmix(h,t);
+    mmix(h,l);
+
+    h ^= h >> 13;
+    h *= m;
+    h ^= h >> 15;
+
+    return h;
+}
+
+// MurmurHashNeutral2, by Austin Appleby
+
+// Same as MurmurHash2, but endian- and alignment-neutral.
+// Half the speed though, alas.
+
+uint32_t MurmurHashNeutral2(const void * key, int len, uint32_t seed)
+{
+    const uint32_t m = 0x5bd1e995;
+    const int r = 24;
+
+    uint32_t h = seed ^ len;
+
+    const unsigned char * data = reinterpret_cast<const unsigned char *>(key);
+
+    while (len >= 4)
+    {
+        uint32_t k;
+
+        k  = data[0];
+        k |= data[1] << 8;
+        k |= data[2] << 16;
+        k |= data[3] << 24;
+
+        k *= m; 
+        k ^= k >> r; 
+        k *= m;
+
+        h *= m;
+        h ^= k;
+
+        data += 4;
+        len -= 4;
+    }
+
+    switch (len)
+    {
+        case 3: h ^= data[2] << 16;
+        case 2: h ^= data[1] << 8;
+        case 1: h ^= data[0];
+                h *= m;
+    };
+
+    h ^= h >> 13;
+    h *= m;
+    h ^= h >> 15;
+
+    return h;
+} 
+
+//-----------------------------------------------------------------------------
+// MurmurHashAligned2, by Austin Appleby
+
+// Same algorithm as MurmurHash2, but only does aligned reads - should be safer
+// on certain platforms. 
+
+// Performance will be lower than MurmurHash2
+
+#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
+
+
+uint32_t MurmurHashAligned2(const void * key, int len, uint32_t seed)
+{
+    const uint32_t m = 0x5bd1e995;
+    const int r = 24;
+
+    const unsigned char * data = reinterpret_cast<const unsigned char *>(key);
+
+    uint32_t h = seed ^ len;
+
+    int align = reinterpret_cast<uint64_t>(data) & 3;
+
+    if (align && (len >= 4))
+    {
+        // Pre-load the temp registers
+
+        uint32_t t = 0, d = 0;
+
+        switch (align)
+        {
+            case 1: t |= data[2] << 16;
+            case 2: t |= data[1] << 8;
+            case 3: t |= data[0];
+        }
+
+        t <<= (8 * align);
+
+        data += 4-align;
+        len -= 4-align;
+
+        int sl = 8 * (4-align);
+        int sr = 8 * align;
+
+        // Mix
+
+        while (len >= 4)
+        {
+            d = *(reinterpret_cast<const uint32_t *>(data));
+            t = (t >> sr) | (d << sl);
+
+            uint32_t k = t;
+
+            MIX(h,k,m);
+
+            t = d;
+
+            data += 4;
+            len -= 4;
+        }
+
+        // Handle leftover data in temp registers
+
+        d = 0;
+
+        if (len >= align)
+        {
+            switch (align)
+            {
+                case 3: d |= data[2] << 16;
+                case 2: d |= data[1] << 8;
+                case 1: d |= data[0];
+            }
+
+            uint32_t k = (t >> sr) | (d << sl);
+            MIX(h,k,m);
+
+            data += align;
+            len -= align;
+
+            //----------
+            // Handle tail bytes
+
+            switch (len)
+            {
+                case 3: h ^= data[2] << 16;
+                case 2: h ^= data[1] << 8;
+                case 1: h ^= data[0];
+                        h *= m;
+            };
+        }
+        else
+        {
+            switch (len)
+            {
+                case 3: d |= data[2] << 16;
+                case 2: d |= data[1] << 8;
+                case 1: d |= data[0];
+                case 0: h ^= (t >> sr) | (d << sl);
+                        h *= m;
+            }
+        }
+
+        h ^= h >> 13;
+        h *= m;
+        h ^= h >> 15;
+
+        return h;
+    }
+    else
+    {
+        while (len >= 4)
+        {
+            uint32_t k = *reinterpret_cast<const uint32_t *>(data);
+
+            MIX(h,k,m);
+
+            data += 4;
+            len -= 4;
+        }
+
+        // Handle tail bytes
+
+        switch (len)
+        {
+            case 3: h ^= data[2] << 16;
+            case 2: h ^= data[1] << 8;
+            case 1: h ^= data[0];
+                    h *= m;
+        };
+
+        h ^= h >> 13;
+        h *= m;
+        h ^= h >> 15;
+
+        return h;
+  }
+}
--- a/dbms/src/Functions/CMakeLists.txt
+++ b/dbms/src/Functions/CMakeLists.txt
@ -91,7 +91,7 @@ list(REMOVE_ITEM clickhouse_functions_headers IFunction.h FunctionFactory.h Func

 add_library(clickhouse_functions ${clickhouse_functions_sources})

-target_link_libraries(clickhouse_functions PUBLIC dbms PRIVATE libconsistent-hashing ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES})
+target_link_libraries(clickhouse_functions PUBLIC dbms PRIVATE libconsistent-hashing ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES} ${MURMURHASH2_LIBRARIES})

 target_include_directories (clickhouse_functions SYSTEM BEFORE PUBLIC ${DIVIDE_INCLUDE_DIR})

--- a/dbms/src/Functions/FunctionsHashing.cpp
+++ b/dbms/src/Functions/FunctionsHashing.cpp
@ -20,6 +20,7 @@ void registerFunctionsHashing(FunctionFactory & factory)
    factory.registerFunction<FunctionIntHash32>();
    factory.registerFunction<FunctionIntHash64>();
    factory.registerFunction<FunctionURLHash>();
+    factory.registerFunction<MurmurHash2>();
 }

 template <>
--- a/dbms/src/Functions/FunctionsHashing.h
+++ b/dbms/src/Functions/FunctionsHashing.h
@ -5,6 +5,7 @@
 #include <city.h>
 #include <farmhash.h>
 #include <metrohash.h>
+#include <murmurhash2.h>

 #include <Poco/ByteOrder.h>

@ -143,6 +144,7 @@ struct SipHash64Impl
    }
 };

+
 struct SipHash128Impl
 {
    static constexpr auto name = "sipHash128";
@ -366,7 +368,6 @@ UInt64 toInteger<Float32>(Float32 x);
 template <>
 UInt64 toInteger<Float64>(Float64 x);

-
 /** We use hash functions called CityHash, FarmHash, MetroHash.
  * In this regard, this template is named with the words `NeighborhoodHash`.
  */
@ -614,6 +615,179 @@ public:
 };


+template <typename Impl>
+class FunctionStringHash32 : public IFunction
+{
+public:
+    static constexpr auto name = Impl::name;
+    static FunctionPtr create(const Context &) { return std::make_shared<FunctionStringHash32>(); }
+
+    String getName() const override { return name; }
+
+    bool isVariadic() const override { return false; }
+
+    size_t getNumberOfArguments() const override { return 1; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & /* arguments */) const override { return std::make_shared<DataTypeUInt32>(); }
+
+    bool useDefaultImplementationForConstants() const override { return true; }
+
+    void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
+    {
+        size_t rows = input_rows_count;
+        auto col_to = ColumnUInt32::create(rows);
+
+        ColumnUInt32::Container & vec_to = col_to->getData();
+
+        if (arguments.empty())
+        {
+            /// Constant random number from /dev/urandom is used as a hash value of empty list of arguments.
+            vec_to.assign(rows, static_cast<UInt32>(0xe28dbde7fe22e41c));
+        }
+
+        /// The function supports arbitary number of arguments of arbitary types.
+
+        for (size_t i = 0; i < arguments.size(); ++i)
+        {
+            const ColumnWithTypeAndName & col = block.getByPosition(arguments[i]);
+            executeForArgument(col.type.get(), col.column.get(), vec_to);
+        }
+
+        block.getByPosition(result).column = std::move(col_to);
+    }
+private:
+
+    void executeAny(const IDataType * from_type, const IColumn * icolumn, ColumnUInt32::Container & vec_to)
+    {
+        if      (checkDataType<DataTypeUInt8>(from_type)) executeIntType<UInt8>(icolumn, vec_to);
+        else if (checkDataType<DataTypeUInt16>(from_type)) executeIntType<UInt16>(icolumn, vec_to);
+        else if (checkDataType<DataTypeUInt32>(from_type)) executeIntType<UInt32>(icolumn, vec_to);
+        else if (checkDataType<DataTypeUInt64>(from_type)) executeIntType<UInt64>(icolumn, vec_to);
+        else if (checkDataType<DataTypeInt8>(from_type)) executeIntType<Int8>(icolumn, vec_to);
+        else if (checkDataType<DataTypeInt16>(from_type)) executeIntType<Int16>(icolumn, vec_to);
+        else if (checkDataType<DataTypeInt32>(from_type)) executeIntType<Int32>(icolumn, vec_to);
+        else if (checkDataType<DataTypeInt64>(from_type)) executeIntType<Int64>(icolumn, vec_to);
+        else if (checkDataType<DataTypeEnum8>(from_type)) executeIntType<Int8>(icolumn, vec_to);
+        else if (checkDataType<DataTypeEnum16>(from_type)) executeIntType<Int16>(icolumn, vec_to);
+        else if (checkDataType<DataTypeDate>(from_type)) executeIntType<UInt16>(icolumn, vec_to);
+        else if (checkDataType<DataTypeDateTime>(from_type)) executeIntType<UInt32>(icolumn, vec_to);
+        else if (checkDataType<DataTypeFloat32>(from_type)) executeIntType<Float32>(icolumn, vec_to);
+        else if (checkDataType<DataTypeFloat64>(from_type)) executeIntType<Float64>(icolumn, vec_to);
+        else if (checkDataType<DataTypeString>(from_type)) executeString(icolumn, vec_to);
+        else if (checkDataType<DataTypeFixedString>(from_type)) executeString(icolumn, vec_to);
+        else
+            throw Exception("Unexpected type " + from_type->getName() + " of argument of function " + getName(),
+                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+    }  
+
+    template <typename FromType>
+    void executeIntType(const IColumn * column, ColumnUInt32::Container & vec_to)
+    {
+        if (const ColumnVector<FromType> * col_from = checkAndGetColumn<ColumnVector<FromType>>(column))
+        {
+            const typename ColumnVector<FromType>::Container & vec_from = col_from->getData();
+            size_t size = vec_from.size();
+            for (size_t i = 0; i < size; ++i)
+            {
+                vec_to[i] = IntHash32Impl::apply(toInteger(vec_from[i]));
+            }
+        }
+        else if (auto col_from = checkAndGetColumnConst<ColumnVector<FromType>>(column))
+        {
+            size_t size = vec_to.size();
+            for (size_t i = 0; i < size; ++i)
+                vec_to[i] = IntHash32Impl::apply(toInteger(col_from->template getValue<FromType>()));
+        }
+        else
+            throw Exception("Illegal column " + column->getName()
+                + " of argument of function " + getName(),
+                ErrorCodes::ILLEGAL_COLUMN);
+    }
+
+    void executeString(const IColumn * column, ColumnUInt32::Container & vec_to)
+    {
+        if (const ColumnString * col_from = checkAndGetColumn<ColumnString>(column))
+        {
+            const typename ColumnString::Chars_t & data = col_from->getChars();
+            const typename ColumnString::Offsets & offsets = col_from->getOffsets();
+            size_t size = offsets.size();
+
+            for (size_t i = 0; i < size; ++i)
+            {
+                vec_to[i] = Impl::Hash32(
+                    reinterpret_cast<const char *>(&data[i == 0 ? 0 : offsets[i - 1]]),
+                    i == 0 ? offsets[i] - 1 : (offsets[i] - 1 - offsets[i - 1]));
+            }
+        }
+        else if (const ColumnFixedString * col_from = checkAndGetColumn<ColumnFixedString>(column))
+        {
+            const typename ColumnString::Chars_t & data = col_from->getChars();
+            size_t n = col_from->getN();
+            size_t size = data.size() / n;
+            for (size_t i = 0; i < size; ++i)
+                vec_to[i] = Impl::Hash32(reinterpret_cast<const char *>(&data[i * n]), n);
+        }
+        else if (const ColumnConst * col_from = checkAndGetColumnConstStringOrFixedString(column))
+        {
+            String value = col_from->getValue<String>().data();
+            const size_t size = vec_to.size();
+            for (size_t i = 0; i < size; ++i)
+                vec_to[i] = Impl::Hash32(value.data(), value.size());
+        }
+        else
+            throw Exception("Illegal column " + column->getName()
+                    + " of first argument of function " + getName(),
+                ErrorCodes::ILLEGAL_COLUMN);
+    }  
+
+    /// Flattening of tuples.
+    void executeForArgument(const IDataType * type, const IColumn * column, ColumnUInt32::Container & vec_to)
+    {
+        /// Flattening of tuples.
+        if (const ColumnTuple * tuple = typeid_cast<const ColumnTuple *>(column))
+        {
+            const Columns & tuple_columns = tuple->getColumns();
+            const DataTypes & tuple_types = typeid_cast<const DataTypeTuple &>(*type).getElements();
+            size_t tuple_size = tuple_columns.size();
+            for (size_t i = 0; i < tuple_size; ++i)
+                executeForArgument(tuple_types[i].get(), tuple_columns[i].get(), vec_to);
+        }
+        else if (const ColumnTuple * tuple = checkAndGetColumnConstData<ColumnTuple>(column))
+        {
+            const Columns & tuple_columns = tuple->getColumns();
+            const DataTypes & tuple_types = typeid_cast<const DataTypeTuple &>(*type).getElements();
+            size_t tuple_size = tuple_columns.size();
+            for (size_t i = 0; i < tuple_size; ++i)
+            {
+                auto tmp = ColumnConst::create(tuple_columns[i], column->size());
+                executeForArgument(tuple_types[i].get(), tmp.get(), vec_to);
+            }
+        }
+        else
+        {
+            executeAny(type, column, vec_to);
+        }
+    }
+};
+
+
+/** Why we need MurmurHash2?
+*   MurmurHash2 is an outdated hash function, superseded by MurmurHash3 and subsequently by CityHash, xxHash, HighwayHash. 
+*   Usually there is no reason to use MurmurHash.
+*   It is needed for the cases when you already have MurmurHash in some applications and you want to reproduce it 
+*   in ClickHouse as is. For example, it is needed to reproduce the behaviour 
+*   for NGINX a/b testing module: https://nginx.ru/en/docs/http/ngx_http_split_clients_module.html
+*/
+struct MurmurHash2Impl
+{
+    static constexpr auto name = "murmurHash2";
+    static UInt32 Hash32(const char * data, const size_t size) 
+    {
+        return MurmurHash2(data, size, 0);  
+    }
+};
+
+
 struct URLHashImpl
 {
    static UInt64 apply(const char * data, const size_t size)
@ -848,5 +1022,5 @@ using FunctionSipHash128 = FunctionStringHashFixedString<SipHash128Impl>;
 using FunctionCityHash64 = FunctionNeighbourhoodHash64<ImplCityHash64>;
 using FunctionFarmHash64 = FunctionNeighbourhoodHash64<ImplFarmHash64>;
 using FunctionMetroHash64 = FunctionNeighbourhoodHash64<ImplMetroHash64>;
-
-}
+using MurmurHash2 = FunctionStringHash32<MurmurHash2Impl>;
+}
				`@ -0,0 +1 @@`
				`MurmurHash2 was written by Austin Appleby, and is placed in the publicdomain. The author hereby disclaims copyright to this source code.`