Merge pull request #50421 from rschu1ze/fix-const-int-hashing

Fix hashing of const integer values
This commit is contained in:
Robert Schulze 2023-06-02 14:43:34 +02:00 committed by GitHub
commit db76d1f7d7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 70 additions and 53 deletions

View File

@ -1073,55 +1073,72 @@ private:
size_t size = vec_from.size();
for (size_t i = 0; i < size; ++i)
{
ToType h;
ToType hash;
if constexpr (Impl::use_int_hash_for_pods)
{
if constexpr (std::is_same_v<ToType, UInt64>)
h = IntHash64Impl::apply(bit_cast<UInt64>(vec_from[i]));
hash = IntHash64Impl::apply(bit_cast<UInt64>(vec_from[i]));
else
h = IntHash32Impl::apply(bit_cast<UInt32>(vec_from[i]));
hash = IntHash32Impl::apply(bit_cast<UInt32>(vec_from[i]));
}
else
{
if constexpr (std::is_same_v<Impl, JavaHashImpl>)
h = JavaHashImpl::apply(vec_from[i]);
hash = JavaHashImpl::apply(vec_from[i]);
else
{
FromType v = vec_from[i];
FromType value = vec_from[i];
if constexpr (std::endian::native == std::endian::big)
{
FromType tmp_v;
reverseMemcpy(&tmp_v, &v, sizeof(v));
v = tmp_v;
FromType value_reversed;
reverseMemcpy(&value_reversed, &value, sizeof(value));
value = value_reversed;
}
h = apply(key, reinterpret_cast<const char *>(&v), sizeof(v));
}
hash = apply(key, reinterpret_cast<const char *>(&value), sizeof(value));
}
}
if constexpr (first)
vec_to[i] = h;
vec_to[i] = hash;
else
vec_to[i] = combineHashes(key, vec_to[i], h);
vec_to[i] = combineHashes(key, vec_to[i], hash);
}
}
else if (auto col_from_const = checkAndGetColumnConst<ColVecType>(column))
{
auto value = col_from_const->template getValue<FromType>();
ToType hash;
if constexpr (std::is_same_v<ToType, UInt64>)
hash = IntHash64Impl::apply(bit_cast<UInt64>(value));
if constexpr (Impl::use_int_hash_for_pods)
{
if constexpr (std::is_same_v<ToType, UInt64>)
hash = IntHash64Impl::apply(bit_cast<UInt64>(value));
else
hash = IntHash32Impl::apply(bit_cast<UInt32>(value));
}
else
hash = IntHash32Impl::apply(bit_cast<UInt32>(value));
{
if constexpr (std::is_same_v<Impl, JavaHashImpl>)
hash = JavaHashImpl::apply(value);
else
{
if constexpr (std::endian::native == std::endian::big)
{
FromType value_reversed;
reverseMemcpy(&value_reversed, &value, sizeof(value));
value = value_reversed;
}
hash = apply(key, reinterpret_cast<const char *>(&value), sizeof(value));
}
}
size_t size = vec_to.size();
if constexpr (first)
vec_to.assign(size, hash);
else
{
for (size_t i = 0; i < size; ++i)
vec_to[i] = combineHashes(key, vec_to[i], hash);
}
}
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
@ -1139,46 +1156,40 @@ private:
size_t size = vec_from.size();
for (size_t i = 0; i < size; ++i)
{
ToType h;
ToType hash;
if constexpr (std::endian::native == std::endian::little)
{
h = apply(key, reinterpret_cast<const char *>(&vec_from[i]), sizeof(vec_from[i]));
}
hash = apply(key, reinterpret_cast<const char *>(&vec_from[i]), sizeof(vec_from[i]));
else
{
char tmp_buffer[sizeof(vec_from[i])];
reverseMemcpy(tmp_buffer, &vec_from[i], sizeof(vec_from[i]));
h = apply(key, reinterpret_cast<const char *>(tmp_buffer), sizeof(vec_from[i]));
hash = apply(key, reinterpret_cast<const char *>(tmp_buffer), sizeof(vec_from[i]));
}
if constexpr (first)
vec_to[i] = h;
vec_to[i] = hash;
else
vec_to[i] = combineHashes(key, vec_to[i], h);
vec_to[i] = combineHashes(key, vec_to[i], hash);
}
}
else if (auto col_from_const = checkAndGetColumnConst<ColVecType>(column))
{
auto value = col_from_const->template getValue<FromType>();
ToType h;
ToType hash;
if constexpr (std::endian::native == std::endian::little)
{
h = apply(key, reinterpret_cast<const char *>(&value), sizeof(value));
}
hash = apply(key, reinterpret_cast<const char *>(&value), sizeof(value));
else
{
char tmp_buffer[sizeof(value)];
reverseMemcpy(tmp_buffer, &value, sizeof(value));
h = apply(key, reinterpret_cast<const char *>(tmp_buffer), sizeof(value));
hash = apply(key, reinterpret_cast<const char *>(tmp_buffer), sizeof(value));
}
size_t size = vec_to.size();
if constexpr (first)
vec_to.assign(size, h);
vec_to.assign(size, hash);
else
{
for (size_t i = 0; i < size; ++i)
vec_to[i] = combineHashes(key, vec_to[i], h);
}
vec_to[i] = combineHashes(key, vec_to[i], hash);
}
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
@ -1191,11 +1202,11 @@ private:
for (size_t i = 0, size = column->size(); i < size; ++i)
{
StringRef bytes = column->getDataAt(i);
const ToType h = apply(key, bytes.data, bytes.size);
const ToType hash = apply(key, bytes.data, bytes.size);
if constexpr (first)
vec_to[i] = h;
vec_to[i] = hash;
else
vec_to[i] = combineHashes(key, vec_to[i], h);
vec_to[i] = combineHashes(key, vec_to[i], hash);
}
}
@ -1211,14 +1222,14 @@ private:
ColumnString::Offset current_offset = 0;
for (size_t i = 0; i < size; ++i)
{
const ToType h = apply(key,
const ToType hash = apply(key,
reinterpret_cast<const char *>(&data[current_offset]),
offsets[i] - current_offset - 1);
if constexpr (first)
vec_to[i] = h;
vec_to[i] = hash;
else
vec_to[i] = combineHashes(key, vec_to[i], h);
vec_to[i] = combineHashes(key, vec_to[i], hash);
current_offset = offsets[i];
}
@ -1231,11 +1242,11 @@ private:
for (size_t i = 0; i < size; ++i)
{
const ToType h = apply(key, reinterpret_cast<const char *>(&data[i * n]), n);
const ToType hash = apply(key, reinterpret_cast<const char *>(&data[i * n]), n);
if constexpr (first)
vec_to[i] = h;
vec_to[i] = hash;
else
vec_to[i] = combineHashes(key, vec_to[i], h);
vec_to[i] = combineHashes(key, vec_to[i], hash);
}
}
else if (const ColumnConst * col_from_const = checkAndGetColumnConstStringOrFixedString(column))
@ -1245,16 +1256,10 @@ private:
const size_t size = vec_to.size();
if constexpr (first)
{
vec_to.assign(size, hash);
}
else
{
for (size_t i = 0; i < size; ++i)
{
vec_to[i] = combineHashes(key, vec_to[i], hash);
}
}
}
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
@ -1283,16 +1288,16 @@ private:
{
ColumnArray::Offset next_offset = offsets[i];
ToType h;
ToType hash;
if constexpr (std::is_same_v<ToType, UInt64>)
h = IntHash64Impl::apply(next_offset - current_offset);
hash = IntHash64Impl::apply(next_offset - current_offset);
else
h = IntHash32Impl::apply(next_offset - current_offset);
hash = IntHash32Impl::apply(next_offset - current_offset);
if constexpr (first)
vec_to[i] = h;
vec_to[i] = hash;
else
vec_to[i] = combineHashes(key, vec_to[i], h);
vec_to[i] = combineHashes(key, vec_to[i], hash);
for (size_t j = current_offset; j < next_offset; ++j)
vec_to[i] = combineHashes(key, vec_to[i], vec_temp[j]);

View File

@ -194,3 +194,6 @@ E28DBDE7FE22E41C
1
E28DBDE7FE22E41C
1
Check bug with hashing of const integer values
11862823756610506724
11862823756610506724

View File

@ -272,3 +272,12 @@ select hex(sipHash64());
SELECT hex(sipHash128()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128()) = '1CE422FEE7BD8DE20000000000000000';
select hex(sipHash64Keyed());
SELECT hex(sipHash128Keyed()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128Keyed()) = '1CE422FEE7BD8DE20000000000000000';
SELECT 'Check bug with hashing of const integer values';
DROP TABLE IF EXISTS tab;
CREATE TABLE tab (key Tuple(UInt64, UInt64), val UInt64) ENGINE=Memory;
INSERT INTO tab VALUES ((2, 2), 4);
-- these two statements must produce the same result
SELECT sipHash64Keyed(key, val) FROM tab;
SELECT sipHash64Keyed(key, 4::UInt64) FROM tab;
DROP TABLE tab;