Merge pull request #41131 from JackyWoo/add_function_java_int_hash

Support Java integers hashing in `javaHash`
This commit is contained in:
Alexey Milovidov 2022-10-02 18:10:51 +03:00 committed by GitHub
commit 0d1d177013
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 103 additions and 5 deletions

View File

@ -296,7 +296,14 @@ SELECT farmHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:0
## javaHash
Calculates [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452) from a string. This hash function is neither fast nor having a good quality. The only reason to use it is when this algorithm is already used in another system and you have to calculate exactly the same result.
Calculates JavaHash from a [string](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452),
[Byte](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Byte.java#l405),
[Short](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Short.java#l410),
[Integer](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Integer.java#l959),
[Long](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Long.java#l1060).
This hash function is neither fast nor having a good quality. The only reason to use it is when this algorithm is already used in another system and you have to calculate exactly the same result.
Note that Java only support calculating signed integers hash, so if you want to calculate unsigned integers hash you must cast it to proper signed ClickHouse types.
**Syntax**
@ -312,6 +319,20 @@ A `Int32` data type hash value.
Query:
```sql
SELECT javaHash(toInt32(123));
```
Result:
```response
┌─javaHash(toInt32(123))─┐
│ 123 │
└────────────────────────┘
```
Query:
```sql
SELECT javaHash('Hello, world!');
```

View File

@ -82,6 +82,7 @@ namespace ErrorCodes
*
*/
struct IntHash32Impl
{
using ReturnType = UInt32;
@ -413,7 +414,6 @@ struct MurmurHash3Impl128
static constexpr bool use_int_hash_for_pods = false;
};
/// http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452
/// Care should be taken to do all calculation in unsigned integers (to avoid undefined behaviour on overflow)
/// but obtain the same result as it is done in signed integers with two's complement arithmetic.
struct JavaHashImpl
@ -421,7 +421,34 @@ struct JavaHashImpl
static constexpr auto name = "javaHash";
using ReturnType = Int32;
static Int32 apply(const char * data, const size_t size)
static ReturnType apply(int64_t x)
{
return static_cast<ReturnType>(
static_cast<uint32_t>(x) ^ static_cast<uint32_t>(static_cast<uint64_t>(x) >> 32));
}
template <class T, typename std::enable_if<std::is_same_v<T, int8_t>
|| std::is_same_v<T, int16_t>
|| std::is_same_v<T, int32_t>, T>::type * = nullptr>
static ReturnType apply(T x)
{
return x;
}
template <typename T, typename std::enable_if<!std::is_same_v<T, int8_t>
&& !std::is_same_v<T, int16_t>
&& !std::is_same_v<T, int32_t>
&& !std::is_same_v<T, int64_t>, T>::type * = nullptr>
static ReturnType apply(T x)
{
if (std::is_unsigned_v<T>)
throw Exception("Unsigned types are not supported", ErrorCodes::NOT_IMPLEMENTED);
const size_t size = sizeof(T);
const char * data = reinterpret_cast<const char *>(&x);
return apply(data, size);
}
static ReturnType apply(const char * data, const size_t size)
{
UInt32 h = 0;
for (size_t i = 0; i < size; ++i)
@ -429,7 +456,7 @@ struct JavaHashImpl
return static_cast<Int32>(h);
}
static Int32 combineHashes(Int32, Int32)
static ReturnType combineHashes(Int32, Int32)
{
throw Exception("Java hash is not combineable for multiple arguments", ErrorCodes::NOT_IMPLEMENTED);
}
@ -824,7 +851,10 @@ private:
}
else
{
h = Impl::apply(reinterpret_cast<const char *>(&vec_from[i]), sizeof(vec_from[i]));
if (std::is_same_v<Impl, JavaHashImpl>)
h = JavaHashImpl::apply(vec_from[i]);
else
h = Impl::apply(reinterpret_cast<const char *>(&vec_from[i]), sizeof(vec_from[i]));
}
if constexpr (first)

View File

@ -1,3 +1,13 @@
123
-123
123
-123
123
-123
123
122
-539222985
-539222986
96354
-676697544
138768

View File

@ -1,5 +1,15 @@
-- Tags: no-fasttest
select javaHash(toInt8(123));
select javaHash(toInt8(-123));
select javaHash(toInt16(123));
select javaHash(toInt16(-123));
select javaHash(toInt32(123));
select javaHash(toInt32(-123));
select javaHash(toInt64(123));
select javaHash(toInt64(-123));
select javaHash(toInt64(12345678901));
select javaHash(toInt64(-12345678901));
select javaHash('abc');
select javaHash('874293087');
select javaHashUTF16LE(convertCharset('a1가', 'utf-8', 'utf-16le'));

View File

@ -0,0 +1,4 @@
Not supported
Not supported
Not supported
Not supported

View File

@ -0,0 +1,23 @@
#!/usr/bin/env bash
# Tags: no-fasttest
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
exception_pattern='DB::Exception:'
function check()
{
${CLICKHOUSE_CLIENT} -q "$1" |& {
if [[ `grep -F $exception_pattern | wc -l` -gt 0 ]]
then
echo 'Not supported'
fi
}
}
check "SELECT javaHash(toUInt8(1))"
check "SELECT javaHash(toUInt16(1))"
check "SELECT javaHash(toUInt32(1))"
check "SELECT javaHash(toUInt64(1))"