ClickHouse/dbms/Common/HashTable/Hash.h

301 lines
8.8 KiB
C++
Raw Normal View History

2014-03-17 02:01:03 +00:00
#pragma once
#include <Core/Types.h>
#include <Common/UInt128.h>
2020-03-18 16:46:07 +00:00
#include <common/unaligned.h>
2014-03-17 02:01:03 +00:00
2019-08-07 21:53:50 +00:00
#include <type_traits>
2014-03-17 02:01:03 +00:00
2017-05-07 20:25:26 +00:00
/** Hash functions that are better than the trivial function std::hash.
*
* Example: when we do aggregation by the visitor ID, the performance increase is more than 5 times.
* This is because of following reasons:
* - in Yandex, visitor identifier is an integer that has timestamp with seconds resolution in lower bits;
* - in typical implementation of standard library, hash function for integers is trivial and just use lower bits;
* - traffic is non-uniformly distributed across a day;
* - we are using open-addressing linear probing hash tables that are most critical to hash function quality,
2019-01-22 19:56:53 +00:00
* and trivial hash function gives disastrous results.
2014-03-17 02:01:03 +00:00
*/
/** Taken from MurmurHash. This is Murmur finalizer.
2017-05-07 20:25:26 +00:00
* Faster than intHash32 when inserting into the hash table UInt64 -> UInt64, where the key is the visitor ID.
*/
inline DB::UInt64 intHash64(DB::UInt64 x)
{
x ^= x >> 33;
x *= 0xff51afd7ed558ccdULL;
x ^= x >> 33;
x *= 0xc4ceb9fe1a85ec53ULL;
x ^= x >> 33;
return x;
}
2017-05-07 20:25:26 +00:00
/** CRC32C is not very high-quality as a hash function,
* according to avalanche and bit independence tests (see SMHasher software), as well as a small number of bits,
2017-05-07 20:25:26 +00:00
* but can behave well when used in hash tables,
* due to high speed (latency 3 + 1 clock cycle, throughput 1 clock cycle).
* Works only with SSE 4.2 support.
*/
#ifdef __SSE4_2__
#include <nmmintrin.h>
#endif
#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
#include <arm_acle.h>
#include <arm_neon.h>
#endif
inline DB::UInt64 intHashCRC32(DB::UInt64 x)
{
#ifdef __SSE4_2__
return _mm_crc32_u64(-1ULL, x);
#elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
return __crc32cd(-1U, x);
#else
/// On other platforms we do not have CRC32. NOTE This can be confusing.
return intHash64(x);
#endif
}
inline DB::UInt64 intHashCRC32(DB::UInt64 x, DB::UInt64 updated_value)
{
#ifdef __SSE4_2__
return _mm_crc32_u64(updated_value, x);
#elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
return __crc32cd(updated_value, x);
#else
/// On other platforms we do not have CRC32. NOTE This can be confusing.
return intHash64(x) ^ updated_value;
#endif
}
2020-03-23 19:21:15 +00:00
template <typename T>
inline typename std::enable_if<(sizeof(T) > sizeof(DB::UInt64)), DB::UInt64>::type
intHashCRC32(const T & x, DB::UInt64 updated_value)
{
2020-03-25 11:14:11 +00:00
auto * begin = reinterpret_cast<const char *>(&x);
2020-03-23 19:21:15 +00:00
for (size_t i = 0; i < sizeof(T); i += sizeof(UInt64))
{
2020-03-25 11:14:11 +00:00
updated_value = intHashCRC32(unalignedLoad<DB::UInt64>(begin), updated_value);
begin += sizeof(DB::UInt64);
2020-03-23 19:21:15 +00:00
}
return updated_value;
}
2020-03-23 19:38:53 +00:00
inline UInt32 updateWeakHash32(const DB::UInt8 * pos, size_t size, DB::UInt32 updated_value)
2020-03-18 16:18:37 +00:00
{
if (size < 8)
{
2020-03-23 20:45:12 +00:00
DB::UInt64 value = 0;
auto * value_ptr = reinterpret_cast<unsigned char *>(&value);
typedef __attribute__((__aligned__(1))) uint16_t uint16_unaligned_t;
typedef __attribute__((__aligned__(1))) uint32_t uint32_unaligned_t;
/// Adopted code from FastMemcpy.h (memcpy_tiny)
switch (size)
{
case 0:
break;
case 1:
value_ptr[0] = pos[0];
break;
case 2:
*reinterpret_cast<uint16_t *>(value_ptr) = *reinterpret_cast<const uint16_unaligned_t *>(pos);
break;
2020-03-23 20:48:46 +00:00
case 3:
*reinterpret_cast<uint16_t *>(value_ptr) = *reinterpret_cast<const uint16_unaligned_t *>(pos);
value_ptr[2] = pos[2];
break;
2020-03-23 20:45:12 +00:00
case 4:
*reinterpret_cast<uint32_t *>(value_ptr) = *reinterpret_cast<const uint32_unaligned_t *>(pos);
break;
2020-03-23 20:48:46 +00:00
case 5:
*reinterpret_cast<uint32_t *>(value_ptr) = *reinterpret_cast<const uint32_unaligned_t *>(pos);
value_ptr[4] = pos[4];
break;
2020-03-23 20:45:12 +00:00
case 6:
*reinterpret_cast<uint32_t *>(value_ptr) = *reinterpret_cast<const uint32_unaligned_t *>(pos);
2020-03-25 11:20:34 +00:00
*reinterpret_cast<uint16_unaligned_t *>(value_ptr + 4) =
*reinterpret_cast<const uint16_unaligned_t *>(pos + 4);
2020-03-23 20:45:12 +00:00
break;
case 7:
*reinterpret_cast<uint32_t *>(value_ptr) = *reinterpret_cast<const uint32_unaligned_t *>(pos);
*reinterpret_cast<uint32_unaligned_t *>(value_ptr + 3) =
*reinterpret_cast<const uint32_unaligned_t *>(pos + 3);
break;
default:
__builtin_unreachable();
}
2020-03-23 20:59:36 +00:00
value_ptr[7] = size;
2020-03-18 16:18:37 +00:00
return intHashCRC32(value, updated_value);
}
const auto * end = pos + size;
2020-03-20 17:05:04 +00:00
while (pos + 8 <= end)
2020-03-18 16:18:37 +00:00
{
auto word = unalignedLoad<UInt64>(pos);
updated_value = intHashCRC32(word, updated_value);
pos += 8;
}
2020-03-20 17:05:04 +00:00
if (pos < end)
{
2020-03-23 20:03:52 +00:00
/// If string size is not divisible by 8.
/// Lets' assume the string was 'abcdefghXYZ', so it's tail is 'XYZ'.
2020-03-20 17:05:04 +00:00
DB::UInt8 tail_size = end - pos;
2020-03-23 20:03:52 +00:00
/// Load tailing 8 bytes. Word is 'defghXYZ'.
2020-03-20 17:05:04 +00:00
auto word = unalignedLoad<UInt64>(end - 8);
2020-03-23 20:03:52 +00:00
/// Prepare mask which will set other 5 bytes to 0. It is 0xFFFFFFFFFFFFFFFF << 5 = 0xFFFFFF0000000000.
/// word & mask = '\0\0\0\0\0XYZ' (bytes are reversed because of little ending)
2020-03-20 17:05:04 +00:00
word &= (~UInt64(0)) << DB::UInt8(8 * (8 - tail_size));
2020-03-23 20:03:52 +00:00
/// Use least byte to store tail length.
2020-03-20 17:05:04 +00:00
word |= tail_size;
2020-03-23 20:03:52 +00:00
/// Now word is '\3\0\0\0\0XYZ'
2020-03-20 17:05:04 +00:00
updated_value = intHashCRC32(word, updated_value);
}
return updated_value;
2020-03-18 16:18:37 +00:00
}
2014-03-17 02:01:03 +00:00
template <typename T>
inline size_t DefaultHash64(T key)
{
union
{
T in;
DB::UInt64 out;
} u;
u.out = 0;
u.in = key;
return intHash64(u.out);
2014-03-17 02:01:03 +00:00
}
2019-08-07 21:53:50 +00:00
template <typename T, typename Enable = void>
struct DefaultHash;
2014-03-17 02:01:03 +00:00
2019-08-07 21:53:50 +00:00
template <typename T>
2019-11-02 05:55:06 +00:00
struct DefaultHash<T, std::enable_if_t<is_arithmetic_v<T>>>
2019-08-07 21:53:50 +00:00
{
size_t operator() (T key) const
{
return DefaultHash64<T>(key);
}
};
template <typename T>
struct DefaultHash<T, std::enable_if_t<DB::IsDecimalNumber<T> && sizeof(T) <= 8>>
{
size_t operator() (T key) const
{
return DefaultHash64<typename T::NativeType>(key);
}
};
template <typename T>
struct DefaultHash<T, std::enable_if_t<DB::IsDecimalNumber<T> && sizeof(T) == 16>>
{
size_t operator() (T key) const
{
return DefaultHash64<Int64>(key >> 64) ^ DefaultHash64<Int64>(key);
}
};
template <typename T> struct HashCRC32;
template <typename T>
inline size_t hashCRC32(T key)
{
union
{
T in;
DB::UInt64 out;
} u;
u.out = 0;
u.in = key;
return intHashCRC32(u.out);
}
#define DEFINE_HASH(T) \
template <> struct HashCRC32<T>\
{\
size_t operator() (T key) const\
{\
return hashCRC32<T>(key);\
}\
};
DEFINE_HASH(DB::UInt8)
DEFINE_HASH(DB::UInt16)
DEFINE_HASH(DB::UInt32)
DEFINE_HASH(DB::UInt64)
DEFINE_HASH(DB::UInt128)
DEFINE_HASH(DB::Int8)
DEFINE_HASH(DB::Int16)
DEFINE_HASH(DB::Int32)
DEFINE_HASH(DB::Int64)
DEFINE_HASH(DB::Float32)
DEFINE_HASH(DB::Float64)
#undef DEFINE_HASH
2015-02-13 01:17:44 +00:00
2017-05-07 20:25:26 +00:00
/// It is reasonable to use for UInt8, UInt16 with sufficient hash table size.
2015-02-13 01:17:44 +00:00
struct TrivialHash
{
template <typename T>
size_t operator() (T key) const
{
return key;
}
2015-02-13 01:17:44 +00:00
};
2015-11-15 09:06:53 +00:00
/** A relatively good non-cryptographic hash function from UInt64 to UInt32.
2017-05-07 20:25:26 +00:00
* But worse (both in quality and speed) than just cutting intHash64.
* Taken from here: http://www.concentric.net/~ttwang/tech/inthash.htm
2015-11-15 09:06:53 +00:00
*
2017-05-07 20:25:26 +00:00
* Slightly changed compared to the function by link: shifts to the right are accidentally replaced by a cyclic shift to the right.
* This change did not affect the smhasher test results.
2015-11-15 09:06:53 +00:00
*
2017-05-07 20:25:26 +00:00
* It is recommended to use different salt for different tasks.
* That was the case that in the database values were sorted by hash (for low-quality pseudo-random spread),
2017-05-07 20:25:26 +00:00
* and in another place, in the aggregate function, the same hash was used in the hash table,
* as a result, this aggregate function was monstrously slowed due to collisions.
*
* NOTE Salting is far from perfect, because it commutes with first steps of calculation.
*
* NOTE As mentioned, this function is slower than intHash64.
2019-01-22 19:56:53 +00:00
* But occasionally, it is faster, when written in a loop and loop is vectorized.
2015-11-15 09:06:53 +00:00
*/
template <DB::UInt64 salt>
inline DB::UInt32 intHash32(DB::UInt64 key)
2015-11-15 09:06:53 +00:00
{
key ^= salt;
2015-11-15 09:06:53 +00:00
key = (~key) + (key << 18);
key = key ^ ((key >> 31) | (key << 33));
key = key * 21;
key = key ^ ((key >> 11) | (key << 53));
key = key + (key << 6);
key = key ^ ((key >> 22) | (key << 42));
2015-11-15 09:06:53 +00:00
return key;
2015-11-15 09:06:53 +00:00
}
2017-05-07 20:25:26 +00:00
/// For containers.
template <typename T, DB::UInt64 salt = 0>
2015-11-15 09:06:53 +00:00
struct IntHash32
{
size_t operator() (const T & key) const
{
return intHash32<salt>(key);
}
2015-11-15 09:06:53 +00:00
};