diff --git a/base/common/StringRef.h b/base/common/StringRef.h index a00ded07b1c..3f5d7beb932 100644 --- a/base/common/StringRef.h +++ b/base/common/StringRef.h @@ -265,35 +265,6 @@ struct CRC32Hash return res; } - - static UInt32 updateWeakHash(StringRef x, UInt32 updated_value) - { - const char * pos = x.data; - size_t size = x.size; - - if (size < 8) - { - auto value = unalignedLoad(pos); - /// 8 bytes were loaded to UInt64 value, but string size is less then 8 bytes. - /// We need to zero excessive bytes to remove the garbage. - /// But instead we move bits to the right, so that we had zeros at left. - /// It helps to have different hash for strings like 'a' and 'a\0' - value >>= UInt8(8 * (8 - size)); - return intHashCRC32(value, updated_value); - } - - const char * end = pos + size; - while (pos + 8 < end) - { - auto word = unalignedLoad(pos); - updated_value = intHashCRC32(word, updated_value); - - pos += 8; - } - - auto word = unalignedLoad(pos - 8); - return intHashCRC32(word, updated_value); - } }; struct StringRefHash : CRC32Hash {}; diff --git a/dbms/src/Columns/ColumnFixedString.cpp b/dbms/src/Columns/ColumnFixedString.cpp index 5c7e1da98dc..9056e0675f7 100644 --- a/dbms/src/Columns/ColumnFixedString.cpp +++ b/dbms/src/Columns/ColumnFixedString.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include @@ -116,7 +116,7 @@ void ColumnFixedString::updateWeakHash32(WeakHash32 & hash) const for (size_t row = 0; row < s; ++row) { - *hash_data = StringRefHash::updateWeakHash(StringRef(pos, n), *hash_data); + *hash_data = ::updateWeakHash32(pos, n, *hash_data); pos += n; ++hash_data; diff --git a/dbms/src/Columns/ColumnString.cpp b/dbms/src/Columns/ColumnString.cpp index 956d900af41..ec868d6ec11 100644 --- a/dbms/src/Columns/ColumnString.cpp +++ b/dbms/src/Columns/ColumnString.cpp @@ -80,7 +80,7 @@ void ColumnString::updateWeakHash32(WeakHash32 & hash) const for (auto & offset : offsets) { auto str_size = offset - prev_offset; - *hash_data = StringRefHash::updateWeakHash(StringRef(pos, str_size), *hash_data); + *hash_data = ::updateWeakHash32(pos, str_size, *hash_data); pos += str_size; prev_offset = offset; diff --git a/dbms/src/Common/HashTable/Hash.h b/dbms/src/Common/HashTable/Hash.h index 7b4a8e0fd7a..cdcad677088 100644 --- a/dbms/src/Common/HashTable/Hash.h +++ b/dbms/src/Common/HashTable/Hash.h @@ -70,6 +70,31 @@ inline DB::UInt64 intHashCRC32(DB::UInt64 x, DB::UInt64 updated_value) #endif } +UInt32 updateWeakHash32(const DB::UInt8 * pos, size_t size, DB::UInt32 updated_value) +{ + if (size < 8) + { + auto value = unalignedLoad(pos); + /// 8 bytes were loaded to UInt64 value, but string size is less then 8 bytes. + /// We need to zero excessive bytes to remove the garbage. + /// But instead we move bits to the right, so that we had zeros at left. + /// It helps to have different hash for strings like 'a' and 'a\0' + value >>= UInt8(8 * (8 - size)); + return intHashCRC32(value, updated_value); + } + + const auto * end = pos + size; + while (pos + 8 < end) + { + auto word = unalignedLoad(pos); + updated_value = intHashCRC32(word, updated_value); + + pos += 8; + } + + auto word = unalignedLoad(pos - 8); + return intHashCRC32(word, updated_value); +} template inline size_t DefaultHash64(T key)