mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 16:50:48 +00:00
Merge pull request #41131 from JackyWoo/add_function_java_int_hash
Support Java integers hashing in `javaHash`
This commit is contained in:
commit
0d1d177013
@ -296,7 +296,14 @@ SELECT farmHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:0
|
||||
|
||||
## javaHash
|
||||
|
||||
Calculates [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452) from a string. This hash function is neither fast nor having a good quality. The only reason to use it is when this algorithm is already used in another system and you have to calculate exactly the same result.
|
||||
Calculates JavaHash from a [string](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452),
|
||||
[Byte](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Byte.java#l405),
|
||||
[Short](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Short.java#l410),
|
||||
[Integer](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Integer.java#l959),
|
||||
[Long](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Long.java#l1060).
|
||||
This hash function is neither fast nor having a good quality. The only reason to use it is when this algorithm is already used in another system and you have to calculate exactly the same result.
|
||||
|
||||
Note that Java only support calculating signed integers hash, so if you want to calculate unsigned integers hash you must cast it to proper signed ClickHouse types.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -312,6 +319,20 @@ A `Int32` data type hash value.
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT javaHash(toInt32(123));
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─javaHash(toInt32(123))─┐
|
||||
│ 123 │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT javaHash('Hello, world!');
|
||||
```
|
||||
|
@ -82,6 +82,7 @@ namespace ErrorCodes
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
struct IntHash32Impl
|
||||
{
|
||||
using ReturnType = UInt32;
|
||||
@ -413,7 +414,6 @@ struct MurmurHash3Impl128
|
||||
static constexpr bool use_int_hash_for_pods = false;
|
||||
};
|
||||
|
||||
/// http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452
|
||||
/// Care should be taken to do all calculation in unsigned integers (to avoid undefined behaviour on overflow)
|
||||
/// but obtain the same result as it is done in signed integers with two's complement arithmetic.
|
||||
struct JavaHashImpl
|
||||
@ -421,7 +421,34 @@ struct JavaHashImpl
|
||||
static constexpr auto name = "javaHash";
|
||||
using ReturnType = Int32;
|
||||
|
||||
static Int32 apply(const char * data, const size_t size)
|
||||
static ReturnType apply(int64_t x)
|
||||
{
|
||||
return static_cast<ReturnType>(
|
||||
static_cast<uint32_t>(x) ^ static_cast<uint32_t>(static_cast<uint64_t>(x) >> 32));
|
||||
}
|
||||
|
||||
template <class T, typename std::enable_if<std::is_same_v<T, int8_t>
|
||||
|| std::is_same_v<T, int16_t>
|
||||
|| std::is_same_v<T, int32_t>, T>::type * = nullptr>
|
||||
static ReturnType apply(T x)
|
||||
{
|
||||
return x;
|
||||
}
|
||||
|
||||
template <typename T, typename std::enable_if<!std::is_same_v<T, int8_t>
|
||||
&& !std::is_same_v<T, int16_t>
|
||||
&& !std::is_same_v<T, int32_t>
|
||||
&& !std::is_same_v<T, int64_t>, T>::type * = nullptr>
|
||||
static ReturnType apply(T x)
|
||||
{
|
||||
if (std::is_unsigned_v<T>)
|
||||
throw Exception("Unsigned types are not supported", ErrorCodes::NOT_IMPLEMENTED);
|
||||
const size_t size = sizeof(T);
|
||||
const char * data = reinterpret_cast<const char *>(&x);
|
||||
return apply(data, size);
|
||||
}
|
||||
|
||||
static ReturnType apply(const char * data, const size_t size)
|
||||
{
|
||||
UInt32 h = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
@ -429,7 +456,7 @@ struct JavaHashImpl
|
||||
return static_cast<Int32>(h);
|
||||
}
|
||||
|
||||
static Int32 combineHashes(Int32, Int32)
|
||||
static ReturnType combineHashes(Int32, Int32)
|
||||
{
|
||||
throw Exception("Java hash is not combineable for multiple arguments", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
@ -824,7 +851,10 @@ private:
|
||||
}
|
||||
else
|
||||
{
|
||||
h = Impl::apply(reinterpret_cast<const char *>(&vec_from[i]), sizeof(vec_from[i]));
|
||||
if (std::is_same_v<Impl, JavaHashImpl>)
|
||||
h = JavaHashImpl::apply(vec_from[i]);
|
||||
else
|
||||
h = Impl::apply(reinterpret_cast<const char *>(&vec_from[i]), sizeof(vec_from[i]));
|
||||
}
|
||||
|
||||
if constexpr (first)
|
||||
|
@ -1,3 +1,13 @@
|
||||
123
|
||||
-123
|
||||
123
|
||||
-123
|
||||
123
|
||||
-123
|
||||
123
|
||||
122
|
||||
-539222985
|
||||
-539222986
|
||||
96354
|
||||
-676697544
|
||||
138768
|
||||
|
@ -1,5 +1,15 @@
|
||||
-- Tags: no-fasttest
|
||||
|
||||
select javaHash(toInt8(123));
|
||||
select javaHash(toInt8(-123));
|
||||
select javaHash(toInt16(123));
|
||||
select javaHash(toInt16(-123));
|
||||
select javaHash(toInt32(123));
|
||||
select javaHash(toInt32(-123));
|
||||
select javaHash(toInt64(123));
|
||||
select javaHash(toInt64(-123));
|
||||
select javaHash(toInt64(12345678901));
|
||||
select javaHash(toInt64(-12345678901));
|
||||
select javaHash('abc');
|
||||
select javaHash('874293087');
|
||||
select javaHashUTF16LE(convertCharset('a1가', 'utf-8', 'utf-16le'));
|
||||
|
@ -0,0 +1,4 @@
|
||||
Not supported
|
||||
Not supported
|
||||
Not supported
|
||||
Not supported
|
23
tests/queries/0_stateless/00800_function_java_hash_with_unsigined_types.sh
Executable file
23
tests/queries/0_stateless/00800_function_java_hash_with_unsigined_types.sh
Executable file
@ -0,0 +1,23 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-fasttest
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
exception_pattern='DB::Exception:'
|
||||
|
||||
function check()
|
||||
{
|
||||
${CLICKHOUSE_CLIENT} -q "$1" |& {
|
||||
if [[ `grep -F $exception_pattern | wc -l` -gt 0 ]]
|
||||
then
|
||||
echo 'Not supported'
|
||||
fi
|
||||
}
|
||||
}
|
||||
|
||||
check "SELECT javaHash(toUInt8(1))"
|
||||
check "SELECT javaHash(toUInt16(1))"
|
||||
check "SELECT javaHash(toUInt32(1))"
|
||||
check "SELECT javaHash(toUInt64(1))"
|
Loading…
Reference in New Issue
Block a user