diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index ad66047a92e..cc66f62f714 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -296,7 +296,14 @@ SELECT farmHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:0 ## javaHash -Calculates [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452) from a string. This hash function is neither fast nor having a good quality. The only reason to use it is when this algorithm is already used in another system and you have to calculate exactly the same result. +Calculates JavaHash from a [string](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452), +[Byte](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Byte.java#l405), +[Short](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Short.java#l410), +[Integer](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Integer.java#l959), +[Long](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Long.java#l1060). +This hash function is neither fast nor having a good quality. The only reason to use it is when this algorithm is already used in another system and you have to calculate exactly the same result. + +Note that Java only support calculating signed integers hash, so if you want to calculate unsigned integers hash you must cast it to proper signed ClickHouse types. **Syntax** @@ -312,6 +319,20 @@ A `Int32` data type hash value. Query: +```sql +SELECT javaHash(toInt32(123)); +``` + +Result: + +```response +┌─javaHash(toInt32(123))─┐ +│ 123 │ +└────────────────────────┘ +``` + +Query: + ```sql SELECT javaHash('Hello, world!'); ``` diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 862592254c1..bbbaa1d40d1 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -82,6 +82,7 @@ namespace ErrorCodes * */ + struct IntHash32Impl { using ReturnType = UInt32; @@ -413,7 +414,6 @@ struct MurmurHash3Impl128 static constexpr bool use_int_hash_for_pods = false; }; -/// http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452 /// Care should be taken to do all calculation in unsigned integers (to avoid undefined behaviour on overflow) /// but obtain the same result as it is done in signed integers with two's complement arithmetic. struct JavaHashImpl @@ -421,7 +421,34 @@ struct JavaHashImpl static constexpr auto name = "javaHash"; using ReturnType = Int32; - static Int32 apply(const char * data, const size_t size) + static ReturnType apply(int64_t x) + { + return static_cast( + static_cast(x) ^ static_cast(static_cast(x) >> 32)); + } + + template + || std::is_same_v + || std::is_same_v, T>::type * = nullptr> + static ReturnType apply(T x) + { + return x; + } + + template + && !std::is_same_v + && !std::is_same_v + && !std::is_same_v, T>::type * = nullptr> + static ReturnType apply(T x) + { + if (std::is_unsigned_v) + throw Exception("Unsigned types are not supported", ErrorCodes::NOT_IMPLEMENTED); + const size_t size = sizeof(T); + const char * data = reinterpret_cast(&x); + return apply(data, size); + } + + static ReturnType apply(const char * data, const size_t size) { UInt32 h = 0; for (size_t i = 0; i < size; ++i) @@ -429,7 +456,7 @@ struct JavaHashImpl return static_cast(h); } - static Int32 combineHashes(Int32, Int32) + static ReturnType combineHashes(Int32, Int32) { throw Exception("Java hash is not combineable for multiple arguments", ErrorCodes::NOT_IMPLEMENTED); } @@ -824,7 +851,10 @@ private: } else { - h = Impl::apply(reinterpret_cast(&vec_from[i]), sizeof(vec_from[i])); + if (std::is_same_v) + h = JavaHashImpl::apply(vec_from[i]); + else + h = Impl::apply(reinterpret_cast(&vec_from[i]), sizeof(vec_from[i])); } if constexpr (first) diff --git a/tests/queries/0_stateless/00800_function_java_hash.reference b/tests/queries/0_stateless/00800_function_java_hash.reference index 5e1fde8441f..db651777113 100644 --- a/tests/queries/0_stateless/00800_function_java_hash.reference +++ b/tests/queries/0_stateless/00800_function_java_hash.reference @@ -1,3 +1,13 @@ +123 +-123 +123 +-123 +123 +-123 +123 +122 +-539222985 +-539222986 96354 -676697544 138768 diff --git a/tests/queries/0_stateless/00800_function_java_hash.sql b/tests/queries/0_stateless/00800_function_java_hash.sql index ec26895ed60..fc4a0557599 100644 --- a/tests/queries/0_stateless/00800_function_java_hash.sql +++ b/tests/queries/0_stateless/00800_function_java_hash.sql @@ -1,5 +1,15 @@ -- Tags: no-fasttest +select javaHash(toInt8(123)); +select javaHash(toInt8(-123)); +select javaHash(toInt16(123)); +select javaHash(toInt16(-123)); +select javaHash(toInt32(123)); +select javaHash(toInt32(-123)); +select javaHash(toInt64(123)); +select javaHash(toInt64(-123)); +select javaHash(toInt64(12345678901)); +select javaHash(toInt64(-12345678901)); select javaHash('abc'); select javaHash('874293087'); select javaHashUTF16LE(convertCharset('a1가', 'utf-8', 'utf-16le')); diff --git a/tests/queries/0_stateless/00800_function_java_hash_with_unsigined_types.reference b/tests/queries/0_stateless/00800_function_java_hash_with_unsigined_types.reference new file mode 100644 index 00000000000..fd8cdc5d5bb --- /dev/null +++ b/tests/queries/0_stateless/00800_function_java_hash_with_unsigined_types.reference @@ -0,0 +1,4 @@ +Not supported +Not supported +Not supported +Not supported diff --git a/tests/queries/0_stateless/00800_function_java_hash_with_unsigined_types.sh b/tests/queries/0_stateless/00800_function_java_hash_with_unsigined_types.sh new file mode 100755 index 00000000000..03e87a90a95 --- /dev/null +++ b/tests/queries/0_stateless/00800_function_java_hash_with_unsigined_types.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +exception_pattern='DB::Exception:' + +function check() +{ + ${CLICKHOUSE_CLIENT} -q "$1" |& { + if [[ `grep -F $exception_pattern | wc -l` -gt 0 ]] + then + echo 'Not supported' + fi + } +} + +check "SELECT javaHash(toUInt8(1))" +check "SELECT javaHash(toUInt16(1))" +check "SELECT javaHash(toUInt32(1))" +check "SELECT javaHash(toUInt64(1))"