mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-26 17:41:59 +00:00
Fix msan issue in keyed siphash
Issue: https://s3.amazonaws.com/clickhouse-test-reports/0/ffdd91669471f4934704f98f0191524496b4e85b/fuzzer_astfuzzermsan/report.html Repro: SELECT hex(sipHash128ReferenceKeyed((toUInt64(2), toUInt64(-9223372036854775807)))) GROUP BY (toUInt64(506097522914230528), toUInt64(now64(2, NULL + NULL), 1084818905618843912)), toUInt64(2), NULL + NULL, char(-2147483649, 1) Minimal repro: SELECT sipHash64Keyed((2::UInt64, toUInt64(2)), 4) GROUP BY toUInt64(2)
This commit is contained in:
parent
a9d5b68946
commit
ad4a21034f
@ -51,7 +51,7 @@ Calculates the MD5 from a string and returns the resulting set of bytes as Fixed
|
||||
If you do not need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the ‘sipHash128’ function instead.
|
||||
If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))).
|
||||
|
||||
## sipHash64 (#hash_functions-siphash64)
|
||||
## sipHash64 {#hash_functions-siphash64}
|
||||
|
||||
Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value.
|
||||
|
||||
@ -63,9 +63,9 @@ This is a cryptographic hash function. It works at least three times faster than
|
||||
|
||||
The function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm:
|
||||
|
||||
1. The first and the second hash value are concatenated to an array which is hashed.
|
||||
2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way.
|
||||
3. This calculation is repeated for all remaining hash values of the original input.
|
||||
1. The first and the second hash value are concatenated to an array which is hashed.
|
||||
2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way.
|
||||
3. This calculation is repeated for all remaining hash values of the original input.
|
||||
|
||||
**Arguments**
|
||||
|
||||
|
@ -91,12 +91,18 @@ namespace impl
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "wrong tuple size: key must be a tuple of 2 UInt64");
|
||||
|
||||
if (const auto * key0col = checkAndGetColumn<ColumnUInt64>(&(tuple->getColumn(0))))
|
||||
ret.key0 = key0col->get64(0);
|
||||
{
|
||||
const auto & key0col_data = key0col->getData();
|
||||
ret.key0 = key0col_data[0];
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "first element of the key tuple is not UInt64");
|
||||
|
||||
if (const auto * key1col = checkAndGetColumn<ColumnUInt64>(&(tuple->getColumn(1))))
|
||||
ret.key1 = key1col->get64(0);
|
||||
{
|
||||
const auto & key1col_data = key1col->getData();
|
||||
ret.key1 = key1col_data[0];
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "second element of the key tuple is not UInt64");
|
||||
|
||||
|
@ -194,3 +194,5 @@ E28DBDE7FE22E41C
|
||||
1
|
||||
E28DBDE7FE22E41C
|
||||
1
|
||||
1CE422FEE7BD8DE20000000000000000
|
||||
7766709361750702608
|
||||
|
@ -272,3 +272,7 @@ select hex(sipHash64());
|
||||
SELECT hex(sipHash128()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128()) = '1CE422FEE7BD8DE20000000000000000';
|
||||
select hex(sipHash64Keyed());
|
||||
SELECT hex(sipHash128Keyed()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128Keyed()) = '1CE422FEE7BD8DE20000000000000000';
|
||||
|
||||
-- Crashed with memory sanitizer
|
||||
SELECT hex(sipHash128ReferenceKeyed((toUInt64(2), toUInt64(-9223372036854775807)))) GROUP BY (toUInt64(506097522914230528), toUInt64(now64(2, NULL + NULL), 1084818905618843912)), toUInt64(2), NULL + NULL, char(-2147483649, 1);
|
||||
SELECT sipHash64Keyed((2::UInt64, toUInt64(2)), 4) GROUP BY toUInt64(2);
|
||||
|
Loading…
Reference in New Issue
Block a user