2012-07-15 21:43:04 +00:00
# pragma once
# include <city.h>
2015-07-17 15:56:08 +00:00
# include <farmhash.h>
2015-07-20 14:59:20 +00:00
# include <metrohash.h>
2021-06-24 09:04:40 +00:00
# include <MurmurHash2.h>
# include <MurmurHash3.h>
2022-04-20 23:26:37 +00:00
# include <wyhash.h>
2021-06-24 09:04:40 +00:00
2022-09-28 13:29:29 +00:00
# include "config.h"
2018-12-23 19:25:40 +00:00
2022-01-09 19:52:03 +00:00
# if USE_BLAKE3
# include <blake3.h>
# endif
2022-01-07 09:41:23 +00:00
2019-02-10 17:40:52 +00:00
# include <Common/SipHash.h>
# include <Common/typeid_cast.h>
2022-10-16 14:21:10 +00:00
# include <Common/safe_cast.h>
2019-02-10 17:40:52 +00:00
# include <Common/HashTable/Hash.h>
2022-01-18 06:51:13 +00:00
# include <xxhash.h>
2019-03-22 11:18:24 +00:00
# if USE_SSL
2021-10-01 01:19:03 +00:00
# include <openssl / md4.h>
2020-04-16 12:31:57 +00:00
# include <openssl / md5.h>
# include <openssl / sha.h>
2018-12-23 19:25:40 +00:00
# endif
2012-07-15 21:43:04 +00:00
# include <Poco/ByteOrder.h>
2017-04-01 09:19:00 +00:00
# include <DataTypes/DataTypesNumber.h>
2020-08-19 11:52:17 +00:00
# include <DataTypes/DataTypesDecimal.h>
2017-04-01 09:19:00 +00:00
# include <DataTypes/DataTypeString.h>
# include <DataTypes/DataTypeDate.h>
# include <DataTypes/DataTypeDateTime.h>
# include <DataTypes/DataTypeArray.h>
# include <DataTypes/DataTypeFixedString.h>
# include <DataTypes/DataTypeEnum.h>
2017-12-08 00:50:25 +00:00
# include <DataTypes/DataTypeTuple.h>
2022-07-28 19:12:00 +00:00
# include <DataTypes/DataTypeMap.h>
2017-04-01 09:19:00 +00:00
# include <Columns/ColumnsNumber.h>
# include <Columns/ColumnString.h>
# include <Columns/ColumnConst.h>
# include <Columns/ColumnFixedString.h>
# include <Columns/ColumnArray.h>
# include <Columns/ColumnTuple.h>
2022-07-28 19:12:00 +00:00
# include <Columns/ColumnMap.h>
2021-05-17 07:30:42 +00:00
# include <Functions/IFunction.h>
2017-07-21 06:35:58 +00:00
# include <Functions/FunctionHelpers.h>
2020-05-25 15:16:19 +00:00
# include <Functions/PerformanceAdaptors.h>
2022-04-04 12:07:05 +00:00
# include <Common/TargetSpecific.h>
2021-10-02 07:13:14 +00:00
# include <base/range.h>
# include <base/bit_cast.h>
2015-04-15 15:00:28 +00:00
2012-07-15 21:43:04 +00:00
namespace DB
{
2017-06-13 02:06:53 +00:00
namespace ErrorCodes
{
2020-02-25 18:02:41 +00:00
extern const int ILLEGAL_TYPE_OF_ARGUMENT ;
extern const int BAD_ARGUMENTS ;
2017-06-13 02:06:53 +00:00
extern const int LOGICAL_ERROR ;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH ;
2018-12-18 20:24:16 +00:00
extern const int NOT_IMPLEMENTED ;
2019-03-14 23:10:51 +00:00
extern const int ILLEGAL_COLUMN ;
2022-10-04 21:11:41 +00:00
extern const int SUPPORT_IS_DISABLED ;
2017-06-13 02:06:53 +00:00
}
2017-05-27 15:45:25 +00:00
/** Hashing functions.
2012-07-15 21:43:04 +00:00
*
2017-10-26 18:36:23 +00:00
* halfMD5 : String - > UInt64
2012-07-15 21:43:04 +00:00
*
2017-05-27 15:45:25 +00:00
* A faster cryptographic hash function :
2017-10-26 18:36:23 +00:00
* sipHash64 : String - > UInt64
2013-10-21 14:35:12 +00:00
*
2017-05-27 15:45:25 +00:00
* Fast non - cryptographic hash function for strings :
2012-07-15 21:43:04 +00:00
* cityHash64 : String - > UInt64
*
2018-10-23 17:43:09 +00:00
* A non - cryptographic hashes from a tuple of values of any types ( uses respective function for strings and intHash64 for numbers ) :
2017-10-26 18:36:23 +00:00
* cityHash64 : any * - > UInt64
2018-10-23 17:43:09 +00:00
* sipHash64 : any * - > UInt64
* halfMD5 : any * - > UInt64
2014-07-04 09:42:56 +00:00
*
2017-05-27 15:45:25 +00:00
* Fast non - cryptographic hash function from any integer :
2017-10-26 18:36:23 +00:00
* intHash32 : number - > UInt32
* intHash64 : number - > UInt64
2014-07-03 11:24:01 +00:00
*
2012-07-15 21:43:04 +00:00
*/
2022-09-09 10:27:29 +00:00
2018-10-23 17:43:09 +00:00
struct IntHash32Impl
{
using ReturnType = UInt32 ;
static UInt32 apply ( UInt64 x )
{
/// seed is taken from /dev/urandom. It allows you to avoid undesirable dependencies with hashes in different data structures.
return intHash32 < 0x75D9543DE018BF45ULL > ( x ) ;
}
} ;
struct IntHash64Impl
{
using ReturnType = UInt64 ;
static UInt64 apply ( UInt64 x )
{
return intHash64 ( x ^ 0x4CF2D2BAAE6DA887ULL ) ;
}
} ;
2021-09-13 08:20:20 +00:00
template < typename T , typename HashFunction >
T combineHashesFunc ( T t1 , T t2 )
{
T hashes [ ] = { t1 , t2 } ;
return HashFunction : : apply ( reinterpret_cast < const char * > ( hashes ) , 2 * sizeof ( T ) ) ;
}
2019-03-22 11:18:24 +00:00
# if USE_SSL
2012-07-15 21:43:04 +00:00
struct HalfMD5Impl
{
2018-10-23 17:43:09 +00:00
static constexpr auto name = " halfMD5 " ;
2018-08-02 09:21:26 +00:00
using ReturnType = UInt64 ;
2012-07-15 21:43:04 +00:00
static UInt64 apply ( const char * begin , size_t size )
{
union
{
unsigned char char_data [ 16 ] ;
2018-08-21 15:56:50 +00:00
uint64_t uint64_data ;
2012-07-15 21:43:04 +00:00
} buf ;
2017-04-01 07:20:54 +00:00
2012-07-15 21:43:04 +00:00
MD5_CTX ctx ;
MD5_Init ( & ctx ) ;
MD5_Update ( & ctx , reinterpret_cast < const unsigned char * > ( begin ) , size ) ;
MD5_Final ( buf . char_data , & ctx ) ;
2017-04-01 07:20:54 +00:00
2018-11-01 17:07:20 +00:00
return Poco : : ByteOrder : : flipBytes ( static_cast < Poco : : UInt64 > ( buf . uint64_data ) ) ; /// Compatibility with existing code. Cast need for old poco AND macos where UInt64 != uint64_t
2012-07-15 21:43:04 +00:00
}
2018-10-23 17:43:09 +00:00
2018-10-24 13:12:59 +00:00
static UInt64 combineHashes ( UInt64 h1 , UInt64 h2 )
2018-10-23 17:43:09 +00:00
{
2018-10-24 13:12:59 +00:00
UInt64 hashes [ ] = { h1 , h2 } ;
return apply ( reinterpret_cast < const char * > ( hashes ) , 16 ) ;
2018-10-23 17:43:09 +00:00
}
2018-11-01 15:47:08 +00:00
/// If true, it will use intHash32 or intHash64 to hash POD types. This behaviour is intended for better performance of some functions.
/// Otherwise it will hash bytes in memory as a string using corresponding hash function.
2018-12-21 17:53:16 +00:00
2018-11-01 15:47:08 +00:00
static constexpr bool use_int_hash_for_pods = false ;
2012-07-15 21:43:04 +00:00
} ;
2021-10-01 01:19:03 +00:00
struct MD4Impl
{
static constexpr auto name = " MD4 " ;
enum { length = MD4_DIGEST_LENGTH } ;
static void apply ( const char * begin , const size_t size , unsigned char * out_char_data )
{
MD4_CTX ctx ;
MD4_Init ( & ctx ) ;
MD4_Update ( & ctx , reinterpret_cast < const unsigned char * > ( begin ) , size ) ;
MD4_Final ( out_char_data , & ctx ) ;
}
} ;
2014-10-29 12:25:33 +00:00
struct MD5Impl
{
static constexpr auto name = " MD5 " ;
2021-10-01 01:19:03 +00:00
enum { length = MD5_DIGEST_LENGTH } ;
2017-04-01 07:20:54 +00:00
2014-10-29 12:25:33 +00:00
static void apply ( const char * begin , const size_t size , unsigned char * out_char_data )
{
MD5_CTX ctx ;
MD5_Init ( & ctx ) ;
MD5_Update ( & ctx , reinterpret_cast < const unsigned char * > ( begin ) , size ) ;
MD5_Final ( out_char_data , & ctx ) ;
}
} ;
struct SHA1Impl
{
static constexpr auto name = " SHA1 " ;
2021-10-01 01:19:03 +00:00
enum { length = SHA_DIGEST_LENGTH } ;
2017-04-01 07:20:54 +00:00
2014-10-29 12:25:33 +00:00
static void apply ( const char * begin , const size_t size , unsigned char * out_char_data )
{
SHA_CTX ctx ;
SHA1_Init ( & ctx ) ;
SHA1_Update ( & ctx , reinterpret_cast < const unsigned char * > ( begin ) , size ) ;
SHA1_Final ( out_char_data , & ctx ) ;
}
} ;
struct SHA224Impl
{
static constexpr auto name = " SHA224 " ;
2021-10-01 01:19:03 +00:00
enum { length = SHA224_DIGEST_LENGTH } ;
2017-04-01 07:20:54 +00:00
2014-10-29 12:25:33 +00:00
static void apply ( const char * begin , const size_t size , unsigned char * out_char_data )
{
SHA256_CTX ctx ;
SHA224_Init ( & ctx ) ;
SHA224_Update ( & ctx , reinterpret_cast < const unsigned char * > ( begin ) , size ) ;
SHA224_Final ( out_char_data , & ctx ) ;
}
} ;
struct SHA256Impl
{
static constexpr auto name = " SHA256 " ;
2021-10-01 01:19:03 +00:00
enum { length = SHA256_DIGEST_LENGTH } ;
2017-04-01 07:20:54 +00:00
2014-10-29 12:25:33 +00:00
static void apply ( const char * begin , const size_t size , unsigned char * out_char_data )
{
SHA256_CTX ctx ;
SHA256_Init ( & ctx ) ;
SHA256_Update ( & ctx , reinterpret_cast < const unsigned char * > ( begin ) , size ) ;
SHA256_Final ( out_char_data , & ctx ) ;
}
} ;
2021-08-19 02:57:20 +00:00
2021-10-01 01:19:03 +00:00
struct SHA384Impl
{
static constexpr auto name = " SHA384 " ;
enum { length = SHA384_DIGEST_LENGTH } ;
static void apply ( const char * begin , const size_t size , unsigned char * out_char_data )
{
SHA512_CTX ctx ;
SHA384_Init ( & ctx ) ;
SHA384_Update ( & ctx , reinterpret_cast < const unsigned char * > ( begin ) , size ) ;
SHA384_Final ( out_char_data , & ctx ) ;
}
} ;
2021-08-19 02:57:20 +00:00
struct SHA512Impl
{
static constexpr auto name = " SHA512 " ;
2021-08-19 07:56:47 +00:00
enum { length = 64 } ;
2021-08-19 02:57:20 +00:00
static void apply ( const char * begin , const size_t size , unsigned char * out_char_data )
{
SHA512_CTX ctx ;
SHA512_Init ( & ctx ) ;
SHA512_Update ( & ctx , reinterpret_cast < const unsigned char * > ( begin ) , size ) ;
SHA512_Final ( out_char_data , & ctx ) ;
}
} ;
2019-03-22 11:18:24 +00:00
# endif
2014-10-29 12:25:33 +00:00
2013-10-21 14:35:12 +00:00
struct SipHash64Impl
{
2018-10-23 17:43:09 +00:00
static constexpr auto name = " sipHash64 " ;
2018-08-02 09:21:26 +00:00
using ReturnType = UInt64 ;
2013-10-21 14:35:12 +00:00
static UInt64 apply ( const char * begin , size_t size )
{
2013-10-21 16:32:49 +00:00
return sipHash64 ( begin , size ) ;
2013-10-21 14:35:12 +00:00
}
2018-10-23 17:43:09 +00:00
2018-10-24 13:12:59 +00:00
static UInt64 combineHashes ( UInt64 h1 , UInt64 h2 )
2018-10-23 18:07:20 +00:00
{
2021-09-13 08:20:20 +00:00
return combineHashesFunc < UInt64 , SipHash64Impl > ( h1 , h2 ) ;
2018-10-23 17:43:09 +00:00
}
2013-10-21 14:35:12 +00:00
2018-11-01 15:47:08 +00:00
static constexpr bool use_int_hash_for_pods = false ;
} ;
2018-07-30 12:19:22 +00:00
2014-10-29 12:25:33 +00:00
struct SipHash128Impl
{
2014-11-10 11:37:48 +00:00
static constexpr auto name = " sipHash128 " ;
2014-10-29 12:25:33 +00:00
2021-09-13 08:20:20 +00:00
using ReturnType = UInt128 ;
static UInt128 combineHashes ( UInt128 h1 , UInt128 h2 )
2014-10-29 12:25:33 +00:00
{
2021-09-13 08:20:20 +00:00
return combineHashesFunc < UInt128 , SipHash128Impl > ( h1 , h2 ) ;
2014-10-29 12:25:33 +00:00
}
2021-09-13 08:20:20 +00:00
static UInt128 apply ( const char * data , const size_t size )
{
return sipHash128 ( data , size ) ;
}
static constexpr bool use_int_hash_for_pods = false ;
2014-10-29 12:25:33 +00:00
} ;
2018-11-01 15:47:08 +00:00
/** Why we need MurmurHash2?
* MurmurHash2 is an outdated hash function , superseded by MurmurHash3 and subsequently by CityHash , xxHash , HighwayHash .
* Usually there is no reason to use MurmurHash .
* It is needed for the cases when you already have MurmurHash in some applications and you want to reproduce it
* in ClickHouse as is . For example , it is needed to reproduce the behaviour
* for NGINX a / b testing module : https : //nginx.ru/en/docs/http/ngx_http_split_clients_module.html
*/
struct MurmurHash2Impl32
{
static constexpr auto name = " murmurHash2_32 " ;
using ReturnType = UInt32 ;
static UInt32 apply ( const char * data , const size_t size )
{
return MurmurHash2 ( data , size , 0 ) ;
}
static UInt32 combineHashes ( UInt32 h1 , UInt32 h2 )
{
return IntHash32Impl : : apply ( h1 ) ^ h2 ;
}
static constexpr bool use_int_hash_for_pods = false ;
} ;
struct MurmurHash2Impl64
{
static constexpr auto name = " murmurHash2_64 " ;
using ReturnType = UInt64 ;
static UInt64 apply ( const char * data , const size_t size )
{
return MurmurHash64A ( data , size , 0 ) ;
}
static UInt64 combineHashes ( UInt64 h1 , UInt64 h2 )
{
return IntHash64Impl : : apply ( h1 ) ^ h2 ;
}
static constexpr bool use_int_hash_for_pods = false ;
} ;
2019-01-08 05:12:52 +00:00
/// To be compatible with gcc: https://github.com/gcc-mirror/gcc/blob/41d6b10e96a1de98e90a7c0378437c3255814b16/libstdc%2B%2B-v3/include/bits/functional_hash.h#L191
2019-01-09 02:03:50 +00:00
struct GccMurmurHashImpl
2019-01-08 05:12:52 +00:00
{
2019-01-09 02:03:50 +00:00
static constexpr auto name = " gccMurmurHash " ;
2019-01-08 05:12:52 +00:00
using ReturnType = UInt64 ;
static UInt64 apply ( const char * data , const size_t size )
{
return MurmurHash64A ( data , size , 0xc70f6907UL ) ;
}
static UInt64 combineHashes ( UInt64 h1 , UInt64 h2 )
{
return IntHash64Impl : : apply ( h1 ) ^ h2 ;
}
static constexpr bool use_int_hash_for_pods = false ;
} ;
2018-11-01 15:47:08 +00:00
struct MurmurHash3Impl32
{
static constexpr auto name = " murmurHash3_32 " ;
using ReturnType = UInt32 ;
static UInt32 apply ( const char * data , const size_t size )
{
union
{
UInt32 h ;
char bytes [ sizeof ( h ) ] ;
} ;
MurmurHash3_x86_32 ( data , size , 0 , bytes ) ;
return h ;
}
static UInt32 combineHashes ( UInt32 h1 , UInt32 h2 )
{
2021-10-11 03:50:43 +00:00
return IntHash32Impl : : apply ( h1 ) ^ h2 ;
2018-11-01 15:47:08 +00:00
}
static constexpr bool use_int_hash_for_pods = false ;
} ;
struct MurmurHash3Impl64
{
static constexpr auto name = " murmurHash3_64 " ;
using ReturnType = UInt64 ;
static UInt64 apply ( const char * data , const size_t size )
{
union
{
UInt64 h [ 2 ] ;
char bytes [ 16 ] ;
} ;
MurmurHash3_x64_128 ( data , size , 0 , bytes ) ;
return h [ 0 ] ^ h [ 1 ] ;
}
static UInt64 combineHashes ( UInt64 h1 , UInt64 h2 )
{
2021-10-11 03:50:43 +00:00
return IntHash64Impl : : apply ( h1 ) ^ h2 ;
2018-11-01 15:47:08 +00:00
}
static constexpr bool use_int_hash_for_pods = false ;
} ;
2020-04-16 12:31:57 +00:00
struct MurmurHash3Impl128
{
static constexpr auto name = " murmurHash3_128 " ;
2021-09-13 08:20:20 +00:00
using ReturnType = UInt128 ;
static UInt128 apply ( const char * data , const size_t size )
{
char bytes [ 16 ] ;
MurmurHash3_x64_128 ( data , size , 0 , bytes ) ;
return * reinterpret_cast < UInt128 * > ( bytes ) ;
}
2021-10-11 03:50:43 +00:00
static UInt128 combineHashes ( UInt128 h1 , UInt128 h2 )
{
return combineHashesFunc < UInt128 , MurmurHash3Impl128 > ( h1 , h2 ) ;
}
2021-09-13 08:20:20 +00:00
static constexpr bool use_int_hash_for_pods = false ;
2020-04-16 12:31:57 +00:00
} ;
2018-12-18 20:24:16 +00:00
/// Care should be taken to do all calculation in unsigned integers (to avoid undefined behaviour on overflow)
2020-06-27 19:05:00 +00:00
/// but obtain the same result as it is done in signed integers with two's complement arithmetic.
2018-12-18 20:24:16 +00:00
struct JavaHashImpl
{
static constexpr auto name = " javaHash " ;
using ReturnType = Int32 ;
2022-09-13 09:59:11 +00:00
static ReturnType apply ( int64_t x )
{
2022-09-28 18:11:32 +00:00
return static_cast < ReturnType > (
2022-09-29 11:56:50 +00:00
static_cast < uint32_t > ( x ) ^ static_cast < uint32_t > ( static_cast < uint64_t > ( x ) > > 32 ) ) ;
2022-09-13 09:59:11 +00:00
}
template < class T , typename std : : enable_if < std : : is_same_v < T , int8_t >
| | std : : is_same_v < T , int16_t >
| | std : : is_same_v < T , int32_t > , T > : : type * = nullptr >
static ReturnType apply ( T x )
{
return x ;
}
template < typename T , typename std : : enable_if < ! std : : is_same_v < T , int8_t >
& & ! std : : is_same_v < T , int16_t >
& & ! std : : is_same_v < T , int32_t >
& & ! std : : is_same_v < T , int64_t > , T > : : type * = nullptr >
static ReturnType apply ( T x )
{
2022-09-29 07:43:55 +00:00
if ( std : : is_unsigned_v < T > )
throw Exception ( " Unsigned types are not supported " , ErrorCodes : : NOT_IMPLEMENTED ) ;
2022-09-13 09:59:11 +00:00
const size_t size = sizeof ( T ) ;
const char * data = reinterpret_cast < const char * > ( & x ) ;
return apply ( data , size ) ;
}
static ReturnType apply ( const char * data , const size_t size )
2018-12-18 20:24:16 +00:00
{
UInt32 h = 0 ;
for ( size_t i = 0 ; i < size ; + + i )
h = 31 * h + static_cast < UInt32 > ( static_cast < Int8 > ( data [ i ] ) ) ;
return static_cast < Int32 > ( h ) ;
}
2022-09-13 09:59:11 +00:00
static ReturnType combineHashes ( Int32 , Int32 )
2018-12-18 20:24:16 +00:00
{
throw Exception ( " Java hash is not combineable for multiple arguments " , ErrorCodes : : NOT_IMPLEMENTED ) ;
}
static constexpr bool use_int_hash_for_pods = false ;
} ;
2019-11-06 10:46:37 +00:00
struct JavaHashUTF16LEImpl
{
static constexpr auto name = " javaHashUTF16LE " ;
using ReturnType = Int32 ;
static Int32 apply ( const char * raw_data , const size_t raw_size )
{
char * data = const_cast < char * > ( raw_data ) ;
size_t size = raw_size ;
// Remove Byte-order-mark(0xFFFE) for UTF-16LE
2019-11-06 14:17:38 +00:00
if ( size > = 2 & & data [ 0 ] = = ' \xFF ' & & data [ 1 ] = = ' \xFE ' )
2019-11-06 10:46:37 +00:00
{
data + = 2 ;
size - = 2 ;
}
if ( size % 2 ! = 0 )
2020-01-10 12:06:22 +00:00
throw Exception ( " Arguments for javaHashUTF16LE must be in the form of UTF-16 " , ErrorCodes : : BAD_ARGUMENTS ) ;
2019-11-06 10:46:37 +00:00
2019-11-06 15:00:59 +00:00
UInt32 h = 0 ;
2019-11-06 10:46:37 +00:00
for ( size_t i = 0 ; i < size ; i + = 2 )
2019-11-06 14:17:38 +00:00
h = 31 * h + static_cast < UInt16 > ( static_cast < UInt8 > ( data [ i ] ) | static_cast < UInt8 > ( data [ i + 1 ] ) < < 8 ) ;
2019-11-06 10:46:37 +00:00
2019-11-06 15:00:59 +00:00
return static_cast < Int32 > ( h ) ;
2019-11-06 10:46:37 +00:00
}
static Int32 combineHashes ( Int32 , Int32 )
{
throw Exception ( " Java hash is not combineable for multiple arguments " , ErrorCodes : : NOT_IMPLEMENTED ) ;
}
static constexpr bool use_int_hash_for_pods = false ;
} ;
2018-12-18 20:24:16 +00:00
/// This is just JavaHash with zeroed out sign bit.
/// This function is used in Hive for versions before 3.0,
/// after 3.0, Hive uses murmur-hash3.
struct HiveHashImpl
{
static constexpr auto name = " hiveHash " ;
using ReturnType = Int32 ;
static Int32 apply ( const char * data , const size_t size )
{
return static_cast < Int32 > ( 0x7FFFFFFF & static_cast < UInt32 > ( JavaHashImpl : : apply ( data , size ) ) ) ;
}
static Int32 combineHashes ( Int32 , Int32 )
{
throw Exception ( " Hive hash is not combineable for multiple arguments " , ErrorCodes : : NOT_IMPLEMENTED ) ;
}
static constexpr bool use_int_hash_for_pods = false ;
} ;
2018-11-01 15:47:08 +00:00
struct ImplCityHash64
{
static constexpr auto name = " cityHash64 " ;
using ReturnType = UInt64 ;
using uint128_t = CityHash_v1_0_2 : : uint128 ;
static auto combineHashes ( UInt64 h1 , UInt64 h2 ) { return CityHash_v1_0_2 : : Hash128to64 ( uint128_t ( h1 , h2 ) ) ; }
static auto apply ( const char * s , const size_t len ) { return CityHash_v1_0_2 : : CityHash64 ( s , len ) ; }
static constexpr bool use_int_hash_for_pods = true ;
} ;
2020-10-31 12:45:53 +00:00
// see farmhash.h for definition of NAMESPACE_FOR_HASH_FUNCTIONS
struct ImplFarmFingerprint64
{
static constexpr auto name = " farmFingerprint64 " ;
using ReturnType = UInt64 ;
using uint128_t = NAMESPACE_FOR_HASH_FUNCTIONS : : uint128_t ;
static auto combineHashes ( UInt64 h1 , UInt64 h2 ) { return NAMESPACE_FOR_HASH_FUNCTIONS : : Fingerprint ( uint128_t ( h1 , h2 ) ) ; }
static auto apply ( const char * s , const size_t len ) { return NAMESPACE_FOR_HASH_FUNCTIONS : : Fingerprint64 ( s , len ) ; }
static constexpr bool use_int_hash_for_pods = true ;
} ;
2018-11-01 15:47:08 +00:00
// see farmhash.h for definition of NAMESPACE_FOR_HASH_FUNCTIONS
struct ImplFarmHash64
{
static constexpr auto name = " farmHash64 " ;
using ReturnType = UInt64 ;
using uint128_t = NAMESPACE_FOR_HASH_FUNCTIONS : : uint128_t ;
static auto combineHashes ( UInt64 h1 , UInt64 h2 ) { return NAMESPACE_FOR_HASH_FUNCTIONS : : Hash128to64 ( uint128_t ( h1 , h2 ) ) ; }
static auto apply ( const char * s , const size_t len ) { return NAMESPACE_FOR_HASH_FUNCTIONS : : Hash64 ( s , len ) ; }
static constexpr bool use_int_hash_for_pods = true ;
} ;
struct ImplMetroHash64
{
static constexpr auto name = " metroHash64 " ;
using ReturnType = UInt64 ;
using uint128_t = CityHash_v1_0_2 : : uint128 ;
static auto combineHashes ( UInt64 h1 , UInt64 h2 ) { return CityHash_v1_0_2 : : Hash128to64 ( uint128_t ( h1 , h2 ) ) ; }
static auto apply ( const char * s , const size_t len )
{
union
{
UInt64 u64 ;
2020-02-22 05:46:35 +00:00
uint8_t u8 [ sizeof ( u64 ) ] ;
2018-11-01 15:47:08 +00:00
} ;
2020-02-22 05:46:35 +00:00
metrohash64_1 ( reinterpret_cast < const uint8_t * > ( s ) , len , 0 , u8 ) ;
2018-11-01 15:47:08 +00:00
return u64 ;
}
static constexpr bool use_int_hash_for_pods = true ;
} ;
2018-12-21 17:53:16 +00:00
struct ImplXxHash32
{
static constexpr auto name = " xxHash32 " ;
using ReturnType = UInt32 ;
static auto apply ( const char * s , const size_t len ) { return XXH32 ( s , len , 0 ) ; }
/**
* With current implementation with more than 1 arguments it will give the results
2020-08-08 00:47:03 +00:00
* non - reproducible from outside of CH .
2018-12-21 17:53:16 +00:00
*
* Proper way of combining several input is to use streaming mode of hash function
* https : //github.com/Cyan4973/xxHash/issues/114#issuecomment-334908566
*
* In common case doable by init_state / update_state / finalize_state
*/
static auto combineHashes ( UInt32 h1 , UInt32 h2 ) { return IntHash32Impl : : apply ( h1 ) ^ h2 ; }
static constexpr bool use_int_hash_for_pods = false ;
} ;
struct ImplXxHash64
{
static constexpr auto name = " xxHash64 " ;
using ReturnType = UInt64 ;
using uint128_t = CityHash_v1_0_2 : : uint128 ;
static auto apply ( const char * s , const size_t len ) { return XXH64 ( s , len , 0 ) ; }
/*
With current implementation with more than 1 arguments it will give the results
2020-08-08 00:47:03 +00:00
non - reproducible from outside of CH . ( see comment on ImplXxHash32 ) .
2018-12-21 17:53:16 +00:00
*/
static auto combineHashes ( UInt64 h1 , UInt64 h2 ) { return CityHash_v1_0_2 : : Hash128to64 ( uint128_t ( h1 , h2 ) ) ; }
static constexpr bool use_int_hash_for_pods = false ;
} ;
2022-01-07 09:41:23 +00:00
struct ImplBLAKE3
{
2022-10-04 21:11:41 +00:00
static constexpr auto name = " BLAKE3 " ;
2022-08-05 18:27:26 +00:00
enum { length = 32 } ;
2022-01-07 09:41:23 +00:00
2022-10-04 21:11:41 +00:00
# if !USE_BLAKE3
2022-10-06 18:41:26 +00:00
[[noreturn]] static void apply ( const char * begin , const size_t size , unsigned char * out_char_data )
{
UNUSED ( begin ) ;
UNUSED ( size ) ;
UNUSED ( out_char_data ) ;
2022-10-05 20:38:44 +00:00
throw Exception ( ErrorCodes : : SUPPORT_IS_DISABLED , " BLAKE3 is not available. Rust code or BLAKE3 itself may be disabled. " ) ;
}
# else
2022-01-08 15:56:20 +00:00
static void apply ( const char * begin , const size_t size , unsigned char * out_char_data )
2022-03-17 17:41:17 +00:00
{
2022-02-13 10:12:45 +00:00
# if defined(MEMORY_SANITIZER)
2022-10-16 14:21:10 +00:00
auto err_msg = blake3_apply_shim_msan_compat ( begin , safe_cast < uint32_t > ( size ) , out_char_data ) ;
2022-09-08 19:25:46 +00:00
__msan_unpoison ( out_char_data , length ) ;
2022-02-13 09:46:20 +00:00
# else
2022-10-16 14:21:10 +00:00
auto err_msg = blake3_apply_shim ( begin , safe_cast < uint32_t > ( size ) , out_char_data ) ;
2022-02-13 09:46:20 +00:00
# endif
2022-01-27 16:51:11 +00:00
if ( err_msg ! = nullptr )
{
2022-09-12 15:43:50 +00:00
auto err_st = std : : string ( err_msg ) ;
2022-01-16 10:28:07 +00:00
blake3_free_char_pointer ( err_msg ) ;
2022-09-12 15:43:50 +00:00
throw Exception ( " Function returned error message: " + std : : string ( err_msg ) , ErrorCodes : : ILLEGAL_TYPE_OF_ARGUMENT ) ;
2022-01-12 15:28:30 +00:00
}
2022-01-07 09:41:23 +00:00
}
2022-10-05 20:38:44 +00:00
# endif
2022-01-07 09:41:23 +00:00
} ;
2014-10-29 12:25:33 +00:00
template < typename Impl >
class FunctionStringHashFixedString : public IFunction
{
public :
2014-11-12 17:23:26 +00:00
static constexpr auto name = Impl : : name ;
2021-04-10 23:33:54 +00:00
static FunctionPtr create ( ContextPtr ) { return std : : make_shared < FunctionStringHashFixedString > ( ) ; }
2017-04-01 07:20:54 +00:00
2015-10-11 23:36:45 +00:00
String getName ( ) const override
2014-10-29 12:25:33 +00:00
{
2014-11-12 17:23:26 +00:00
return name ;
2014-10-29 12:25:33 +00:00
}
2017-04-01 07:20:54 +00:00
2016-12-29 19:38:10 +00:00
size_t getNumberOfArguments ( ) const override { return 1 ; }
2017-04-01 07:20:54 +00:00
2016-07-06 09:47:55 +00:00
DataTypePtr getReturnTypeImpl ( const DataTypes & arguments ) const override
2014-10-29 12:25:33 +00:00
{
2020-06-23 05:35:35 +00:00
if ( ! isStringOrFixedString ( arguments [ 0 ] ) )
2014-10-29 12:25:33 +00:00
throw Exception ( " Illegal type " + arguments [ 0 ] - > getName ( ) + " of argument of function " + getName ( ) ,
ErrorCodes : : ILLEGAL_TYPE_OF_ARGUMENT ) ;
2017-04-01 07:20:54 +00:00
2016-05-28 07:48:40 +00:00
return std : : make_shared < DataTypeFixedString > ( Impl : : length ) ;
2014-10-29 12:25:33 +00:00
}
2017-04-01 07:20:54 +00:00
2017-07-23 08:40:43 +00:00
bool useDefaultImplementationForConstants ( ) const override { return true ; }
2021-06-22 16:21:23 +00:00
bool isSuitableForShortCircuitArgumentsExecution ( const DataTypesWithConstInfo & /*arguments*/ ) const override { return true ; }
2021-04-29 14:48:26 +00:00
2020-11-17 13:24:45 +00:00
ColumnPtr executeImpl ( const ColumnsWithTypeAndName & arguments , const DataTypePtr & , size_t /*input_rows_count*/ ) const override
2014-10-29 12:25:33 +00:00
{
2020-10-18 19:00:13 +00:00
if ( const ColumnString * col_from = checkAndGetColumn < ColumnString > ( arguments [ 0 ] . column . get ( ) ) )
2014-10-29 12:25:33 +00:00
{
2017-12-14 01:43:19 +00:00
auto col_to = ColumnFixedString : : create ( Impl : : length ) ;
2017-04-01 07:20:54 +00:00
2018-11-25 00:08:50 +00:00
const typename ColumnString : : Chars & data = col_from - > getChars ( ) ;
2017-12-15 21:32:25 +00:00
const typename ColumnString : : Offsets & offsets = col_from - > getOffsets ( ) ;
2014-10-29 12:25:33 +00:00
auto & chars_to = col_to - > getChars ( ) ;
const auto size = offsets . size ( ) ;
chars_to . resize ( size * Impl : : length ) ;
2017-04-01 07:20:54 +00:00
2018-07-30 18:00:16 +00:00
ColumnString : : Offset current_offset = 0 ;
2014-10-29 12:25:33 +00:00
for ( size_t i = 0 ; i < size ; + + i )
2018-07-30 18:00:16 +00:00
{
2014-10-29 12:25:33 +00:00
Impl : : apply (
2018-07-30 18:00:16 +00:00
reinterpret_cast < const char * > ( & data [ current_offset ] ) ,
offsets [ i ] - current_offset - 1 ,
2020-02-22 05:46:35 +00:00
reinterpret_cast < uint8_t * > ( & chars_to [ i * Impl : : length ] ) ) ;
2017-12-16 05:21:04 +00:00
2018-07-30 18:00:16 +00:00
current_offset = offsets [ i ] ;
}
2020-10-18 19:00:13 +00:00
return col_to ;
2014-10-29 12:25:33 +00:00
}
2020-06-23 05:35:35 +00:00
else if (
2020-10-18 19:00:13 +00:00
const ColumnFixedString * col_from_fix = checkAndGetColumn < ColumnFixedString > ( arguments [ 0 ] . column . get ( ) ) )
2020-06-23 05:35:35 +00:00
{
auto col_to = ColumnFixedString : : create ( Impl : : length ) ;
const typename ColumnFixedString : : Chars & data = col_from_fix - > getChars ( ) ;
const auto size = col_from_fix - > size ( ) ;
auto & chars_to = col_to - > getChars ( ) ;
const auto length = col_from_fix - > getN ( ) ;
chars_to . resize ( size * Impl : : length ) ;
for ( size_t i = 0 ; i < size ; + + i )
{
Impl : : apply (
reinterpret_cast < const char * > ( & data [ i * length ] ) , length , reinterpret_cast < uint8_t * > ( & chars_to [ i * Impl : : length ] ) ) ;
}
2020-10-18 19:00:13 +00:00
return col_to ;
2020-06-23 05:35:35 +00:00
}
2014-10-29 12:25:33 +00:00
else
2020-10-18 19:00:13 +00:00
throw Exception ( " Illegal column " + arguments [ 0 ] . column - > getName ( )
2014-10-29 12:25:33 +00:00
+ " of first argument of function " + getName ( ) ,
ErrorCodes : : ILLEGAL_COLUMN ) ;
}
} ;
2020-05-25 15:16:19 +00:00
DECLARE_MULTITARGET_CODE (
2012-07-15 21:43:04 +00:00
template < typename Impl , typename Name >
class FunctionIntHash : public IFunction
{
2014-11-12 17:23:26 +00:00
public :
static constexpr auto name = Name : : name ;
2012-07-15 21:43:04 +00:00
private :
2016-05-28 10:35:44 +00:00
using ToType = typename Impl : : ReturnType ;
2017-04-01 07:20:54 +00:00
2012-07-15 21:43:04 +00:00
template < typename FromType >
2020-11-17 13:24:45 +00:00
ColumnPtr executeType ( const ColumnsWithTypeAndName & arguments ) const
2012-07-15 21:43:04 +00:00
{
2021-09-10 11:49:22 +00:00
using ColVecType = ColumnVectorOrDecimal < FromType > ;
2020-08-19 11:52:17 +00:00
2020-10-18 19:00:13 +00:00
if ( const ColVecType * col_from = checkAndGetColumn < ColVecType > ( arguments [ 0 ] . column . get ( ) ) )
2012-07-15 21:43:04 +00:00
{
2017-12-14 01:43:19 +00:00
auto col_to = ColumnVector < ToType > : : create ( ) ;
2017-04-01 07:20:54 +00:00
2020-08-19 11:52:17 +00:00
const typename ColVecType : : Container & vec_from = col_from - > getData ( ) ;
2017-12-15 21:32:25 +00:00
typename ColumnVector < ToType > : : Container & vec_to = col_to - > getData ( ) ;
2017-04-01 07:20:54 +00:00
2012-07-15 21:43:04 +00:00
size_t size = vec_from . size ( ) ;
vec_to . resize ( size ) ;
for ( size_t i = 0 ; i < size ; + + i )
vec_to [ i ] = Impl : : apply ( vec_from [ i ] ) ;
2017-12-16 05:21:04 +00:00
2020-10-18 19:00:13 +00:00
return col_to ;
2012-07-15 21:43:04 +00:00
}
else
2020-10-18 19:00:13 +00:00
throw Exception ( " Illegal column " + arguments [ 0 ] . column - > getName ( )
2014-11-12 17:23:26 +00:00
+ " of first argument of function " + Name : : name ,
2012-07-15 21:43:04 +00:00
ErrorCodes : : ILLEGAL_COLUMN ) ;
}
public :
2015-10-11 23:36:45 +00:00
String getName ( ) const override
2012-07-15 21:43:04 +00:00
{
2014-11-12 17:23:26 +00:00
return name ;
2012-07-15 21:43:04 +00:00
}
2017-04-01 07:20:54 +00:00
2016-12-29 19:38:10 +00:00
size_t getNumberOfArguments ( ) const override { return 1 ; }
2017-04-01 07:20:54 +00:00
2016-07-06 09:47:55 +00:00
DataTypePtr getReturnTypeImpl ( const DataTypes & arguments ) const override
2012-07-15 21:43:04 +00:00
{
2018-11-01 15:47:08 +00:00
if ( ! arguments [ 0 ] - > isValueRepresentedByNumber ( ) )
2012-07-15 21:43:04 +00:00
throw Exception ( " Illegal type " + arguments [ 0 ] - > getName ( ) + " of argument of function " + getName ( ) ,
ErrorCodes : : ILLEGAL_TYPE_OF_ARGUMENT ) ;
2017-04-01 07:20:54 +00:00
2017-03-12 10:13:45 +00:00
return std : : make_shared < DataTypeNumber < typename Impl : : ReturnType > > ( ) ;
2012-07-15 21:43:04 +00:00
}
2017-04-01 07:20:54 +00:00
2017-07-23 08:40:43 +00:00
bool useDefaultImplementationForConstants ( ) const override { return true ; }
2021-06-22 16:21:23 +00:00
bool isSuitableForShortCircuitArgumentsExecution ( const DataTypesWithConstInfo & /*arguments*/ ) const override { return false ; }
2021-04-29 14:48:26 +00:00
2020-11-17 13:24:45 +00:00
ColumnPtr executeImpl ( const ColumnsWithTypeAndName & arguments , const DataTypePtr & , size_t /*input_rows_count*/ ) const override
2012-07-15 21:43:04 +00:00
{
2020-10-18 19:00:13 +00:00
const IDataType * from_type = arguments [ 0 ] . type . get ( ) ;
2018-09-07 14:37:26 +00:00
WhichDataType which ( from_type ) ;
2020-10-18 19:00:13 +00:00
if ( which . isUInt8 ( ) )
return executeType < UInt8 > ( arguments ) ;
else if ( which . isUInt16 ( ) )
return executeType < UInt16 > ( arguments ) ;
else if ( which . isUInt32 ( ) )
return executeType < UInt32 > ( arguments ) ;
else if ( which . isUInt64 ( ) )
return executeType < UInt64 > ( arguments ) ;
else if ( which . isInt8 ( ) )
return executeType < Int8 > ( arguments ) ;
else if ( which . isInt16 ( ) )
return executeType < Int16 > ( arguments ) ;
else if ( which . isInt32 ( ) )
return executeType < Int32 > ( arguments ) ;
else if ( which . isInt64 ( ) )
return executeType < Int64 > ( arguments ) ;
else if ( which . isDate ( ) )
return executeType < UInt16 > ( arguments ) ;
2021-07-15 11:41:52 +00:00
else if ( which . isDate32 ( ) )
return executeType < Int32 > ( arguments ) ;
2020-10-18 19:00:13 +00:00
else if ( which . isDateTime ( ) )
return executeType < UInt32 > ( arguments ) ;
else if ( which . isDecimal32 ( ) )
return executeType < Decimal32 > ( arguments ) ;
else if ( which . isDecimal64 ( ) )
return executeType < Decimal64 > ( arguments ) ;
2012-07-15 21:43:04 +00:00
else
2020-10-18 19:00:13 +00:00
throw Exception ( " Illegal type " + arguments [ 0 ] . type - > getName ( ) + " of argument of function " + getName ( ) ,
2012-07-15 21:43:04 +00:00
ErrorCodes : : ILLEGAL_TYPE_OF_ARGUMENT ) ;
}
} ;
2020-05-25 15:16:19 +00:00
) // DECLARE_MULTITARGET_CODE
template < typename Impl , typename Name >
class FunctionIntHash : public TargetSpecific : : Default : : FunctionIntHash < Impl , Name >
{
public :
2021-04-10 23:33:54 +00:00
explicit FunctionIntHash ( ContextPtr context ) : selector ( context )
2020-05-25 15:16:19 +00:00
{
selector . registerImplementation < TargetArch : : Default ,
TargetSpecific : : Default : : FunctionIntHash < Impl , Name > > ( ) ;
2020-05-28 11:48:56 +00:00
2020-05-26 15:56:46 +00:00
# if USE_MULTITARGET_CODE
2020-05-25 15:16:19 +00:00
selector . registerImplementation < TargetArch : : AVX2 ,
TargetSpecific : : AVX2 : : FunctionIntHash < Impl , Name > > ( ) ;
selector . registerImplementation < TargetArch : : AVX512F ,
TargetSpecific : : AVX512F : : FunctionIntHash < Impl , Name > > ( ) ;
2020-05-26 15:56:46 +00:00
# endif
2020-05-25 15:16:19 +00:00
}
2020-11-17 13:24:45 +00:00
ColumnPtr executeImpl ( const ColumnsWithTypeAndName & arguments , const DataTypePtr & result_type , size_t input_rows_count ) const override
2020-05-25 15:16:19 +00:00
{
2020-10-18 19:00:13 +00:00
return selector . selectAndExecute ( arguments , result_type , input_rows_count ) ;
2020-05-25 15:16:19 +00:00
}
2020-05-28 11:48:56 +00:00
2021-04-10 23:33:54 +00:00
static FunctionPtr create ( ContextPtr context )
2020-05-25 15:16:19 +00:00
{
return std : : make_shared < FunctionIntHash > ( context ) ;
}
private :
ImplementationSelector < IFunction > selector ;
} ;
2012-07-15 21:43:04 +00:00
2020-05-26 15:56:46 +00:00
DECLARE_MULTITARGET_CODE (
2015-07-17 15:56:08 +00:00
template < typename Impl >
2018-10-23 17:43:09 +00:00
class FunctionAnyHash : public IFunction
2014-07-03 11:24:01 +00:00
{
2014-11-12 17:23:26 +00:00
public :
2015-07-17 15:56:08 +00:00
static constexpr auto name = Impl : : name ;
2014-11-12 17:23:26 +00:00
2014-07-03 11:24:01 +00:00
private :
2018-10-23 17:43:09 +00:00
using ToType = typename Impl : : ReturnType ;
2019-11-13 14:20:23 +00:00
template < typename FromType , bool first >
2020-07-21 13:58:07 +00:00
void executeIntType ( const IColumn * column , typename ColumnVector < ToType > : : Container & vec_to ) const
2014-07-03 11:24:01 +00:00
{
2021-09-10 11:49:22 +00:00
using ColVecType = ColumnVectorOrDecimal < FromType > ;
2020-08-19 11:52:17 +00:00
if ( const ColVecType * col_from = checkAndGetColumn < ColVecType > ( column ) )
2014-07-03 11:24:01 +00:00
{
2020-08-19 11:52:17 +00:00
const typename ColVecType : : Container & vec_from = col_from - > getData ( ) ;
2014-07-03 11:24:01 +00:00
size_t size = vec_from . size ( ) ;
for ( size_t i = 0 ; i < size ; + + i )
2014-07-04 09:42:56 +00:00
{
2018-10-23 17:43:09 +00:00
ToType h ;
2018-11-01 15:47:08 +00:00
if constexpr ( Impl : : use_int_hash_for_pods )
{
if constexpr ( std : : is_same_v < ToType , UInt64 > )
2021-06-15 19:55:21 +00:00
h = IntHash64Impl : : apply ( bit_cast < UInt64 > ( vec_from [ i ] ) ) ;
2018-11-01 15:47:08 +00:00
else
2021-06-15 19:55:21 +00:00
h = IntHash32Impl : : apply ( bit_cast < UInt32 > ( vec_from [ i ] ) ) ;
2018-11-01 15:47:08 +00:00
}
2018-10-23 17:43:09 +00:00
else
2018-11-01 15:47:08 +00:00
{
2022-09-13 09:59:11 +00:00
if ( std : : is_same_v < Impl , JavaHashImpl > )
h = JavaHashImpl : : apply ( vec_from [ i ] ) ;
else
h = Impl : : apply ( reinterpret_cast < const char * > ( & vec_from [ i ] ) , sizeof ( vec_from [ i ] ) ) ;
2018-11-01 15:47:08 +00:00
}
2020-08-19 11:52:17 +00:00
if constexpr ( first )
2014-07-04 09:42:56 +00:00
vec_to [ i ] = h ;
else
2018-10-24 13:12:59 +00:00
vec_to [ i ] = Impl : : combineHashes ( vec_to [ i ] , h ) ;
2014-07-04 09:42:56 +00:00
}
2014-07-03 11:24:01 +00:00
}
2020-08-19 11:52:17 +00:00
else if ( auto col_from_const = checkAndGetColumnConst < ColVecType > ( column ) )
2014-07-03 11:24:01 +00:00
{
2019-01-04 12:10:00 +00:00
auto value = col_from_const - > template getValue < FromType > ( ) ;
2018-10-23 17:43:09 +00:00
ToType hash ;
if constexpr ( std : : is_same_v < ToType , UInt64 > )
2021-06-15 19:55:21 +00:00
hash = IntHash64Impl : : apply ( bit_cast < UInt64 > ( value ) ) ;
2018-10-23 17:43:09 +00:00
else
2021-06-15 19:55:21 +00:00
hash = IntHash32Impl : : apply ( bit_cast < UInt32 > ( value ) ) ;
2018-10-23 17:43:09 +00:00
2014-07-03 11:24:01 +00:00
size_t size = vec_to . size ( ) ;
2020-08-19 11:52:17 +00:00
if constexpr ( first )
2014-07-04 09:42:56 +00:00
{
vec_to . assign ( size , hash ) ;
}
else
{
for ( size_t i = 0 ; i < size ; + + i )
2018-10-24 13:12:59 +00:00
vec_to [ i ] = Impl : : combineHashes ( vec_to [ i ] , hash ) ;
2014-07-04 09:42:56 +00:00
}
2014-07-03 11:24:01 +00:00
}
else
throw Exception ( " Illegal column " + column - > getName ( )
+ " of argument of function " + getName ( ) ,
ErrorCodes : : ILLEGAL_COLUMN ) ;
}
2017-04-01 07:20:54 +00:00
2020-08-19 11:52:17 +00:00
template < typename FromType , bool first >
void executeBigIntType ( const IColumn * column , typename ColumnVector < ToType > : : Container & vec_to ) const
{
2021-09-10 11:49:22 +00:00
using ColVecType = ColumnVectorOrDecimal < FromType > ;
2020-08-19 11:52:17 +00:00
if ( const ColVecType * col_from = checkAndGetColumn < ColVecType > ( column ) )
{
const typename ColVecType : : Container & vec_from = col_from - > getData ( ) ;
size_t size = vec_from . size ( ) ;
for ( size_t i = 0 ; i < size ; + + i )
{
2021-01-26 18:22:40 +00:00
ToType h = Impl : : apply ( reinterpret_cast < const char * > ( & vec_from [ i ] ) , sizeof ( vec_from [ i ] ) ) ;
2020-08-19 11:52:17 +00:00
if constexpr ( first )
vec_to [ i ] = h ;
else
vec_to [ i ] = Impl : : combineHashes ( vec_to [ i ] , h ) ;
}
}
else if ( auto col_from_const = checkAndGetColumnConst < ColVecType > ( column ) )
{
auto value = col_from_const - > template getValue < FromType > ( ) ;
2021-01-26 18:22:40 +00:00
ToType h = Impl : : apply ( reinterpret_cast < const char * > ( & value ) , sizeof ( value ) ) ;
2020-08-19 11:52:17 +00:00
size_t size = vec_to . size ( ) ;
if constexpr ( first )
{
vec_to . assign ( size , h ) ;
}
else
{
for ( size_t i = 0 ; i < size ; + + i )
vec_to [ i ] = Impl : : combineHashes ( vec_to [ i ] , h ) ;
}
}
else
throw Exception ( " Illegal column " + column - > getName ( )
+ " of argument of function " + getName ( ) ,
ErrorCodes : : ILLEGAL_COLUMN ) ;
}
2019-11-12 16:37:36 +00:00
template < bool first >
2020-07-21 13:58:07 +00:00
void executeGeneric ( const IColumn * column , typename ColumnVector < ToType > : : Container & vec_to ) const
2019-12-06 15:21:58 +00:00
{
2019-12-06 19:21:22 +00:00
for ( size_t i = 0 , size = column - > size ( ) ; i < size ; + + i )
2019-12-06 15:21:58 +00:00
{
2019-12-06 16:27:45 +00:00
StringRef bytes = column - > getDataAt ( i ) ;
const ToType h = Impl : : apply ( bytes . data , bytes . size ) ;
2020-08-19 11:52:17 +00:00
if constexpr ( first )
2019-12-06 16:27:45 +00:00
vec_to [ i ] = h ;
2019-12-06 15:21:58 +00:00
else
2019-12-06 16:27:45 +00:00
vec_to [ i ] = Impl : : combineHashes ( vec_to [ i ] , h ) ;
2019-12-06 15:21:58 +00:00
}
}
2014-07-04 09:42:56 +00:00
template < bool first >
2020-07-21 13:58:07 +00:00
void executeString ( const IColumn * column , typename ColumnVector < ToType > : : Container & vec_to ) const
2014-07-03 11:24:01 +00:00
{
2017-07-21 06:35:58 +00:00
if ( const ColumnString * col_from = checkAndGetColumn < ColumnString > ( column ) )
2014-07-03 11:24:01 +00:00
{
2018-11-25 00:08:50 +00:00
const typename ColumnString : : Chars & data = col_from - > getChars ( ) ;
2017-12-15 21:32:25 +00:00
const typename ColumnString : : Offsets & offsets = col_from - > getOffsets ( ) ;
2014-07-03 11:24:01 +00:00
size_t size = offsets . size ( ) ;
2017-04-01 07:20:54 +00:00
2018-07-30 18:00:16 +00:00
ColumnString : : Offset current_offset = 0 ;
2014-07-03 11:24:01 +00:00
for ( size_t i = 0 ; i < size ; + + i )
2014-07-04 09:42:56 +00:00
{
2018-10-23 17:43:09 +00:00
const ToType h = Impl : : apply (
2018-07-30 18:00:16 +00:00
reinterpret_cast < const char * > ( & data [ current_offset ] ) ,
offsets [ i ] - current_offset - 1 ) ;
2020-08-19 11:52:17 +00:00
if constexpr ( first )
2014-07-04 09:42:56 +00:00
vec_to [ i ] = h ;
else
2018-10-24 13:12:59 +00:00
vec_to [ i ] = Impl : : combineHashes ( vec_to [ i ] , h ) ;
2018-07-30 18:00:16 +00:00
current_offset = offsets [ i ] ;
2014-07-04 09:42:56 +00:00
}
2014-07-03 11:24:01 +00:00
}
2019-01-04 12:10:00 +00:00
else if ( const ColumnFixedString * col_from_fixed = checkAndGetColumn < ColumnFixedString > ( column ) )
2014-07-03 11:24:01 +00:00
{
2019-01-04 12:10:00 +00:00
const typename ColumnString : : Chars & data = col_from_fixed - > getChars ( ) ;
size_t n = col_from_fixed - > getN ( ) ;
2014-07-03 11:24:01 +00:00
size_t size = data . size ( ) / n ;
2018-07-30 18:00:16 +00:00
2014-07-03 11:24:01 +00:00
for ( size_t i = 0 ; i < size ; + + i )
2014-07-04 09:42:56 +00:00
{
2018-10-23 17:43:09 +00:00
const ToType h = Impl : : apply ( reinterpret_cast < const char * > ( & data [ i * n ] ) , n ) ;
2020-08-19 11:52:17 +00:00
if constexpr ( first )
2014-07-04 09:42:56 +00:00
vec_to [ i ] = h ;
else
2018-10-24 13:12:59 +00:00
vec_to [ i ] = Impl : : combineHashes ( vec_to [ i ] , h ) ;
2014-07-04 09:42:56 +00:00
}
2014-07-03 11:24:01 +00:00
}
2019-01-04 12:10:00 +00:00
else if ( const ColumnConst * col_from_const = checkAndGetColumnConstStringOrFixedString ( column ) )
2014-07-03 11:24:01 +00:00
{
2020-10-18 19:00:13 +00:00
String value = col_from_const - > getValue < String > ( ) ;
2018-10-23 17:43:09 +00:00
const ToType hash = Impl : : apply ( value . data ( ) , value . size ( ) ) ;
2015-07-17 15:56:08 +00:00
const size_t size = vec_to . size ( ) ;
2018-07-30 18:00:16 +00:00
2020-08-19 11:52:17 +00:00
if constexpr ( first )
2014-07-04 09:42:56 +00:00
{
vec_to . assign ( size , hash ) ;
}
else
{
for ( size_t i = 0 ; i < size ; + + i )
{
2018-10-24 13:12:59 +00:00
vec_to [ i ] = Impl : : combineHashes ( vec_to [ i ] , hash ) ;
2014-07-04 09:42:56 +00:00
}
}
2014-07-03 11:24:01 +00:00
}
else
throw Exception ( " Illegal column " + column - > getName ( )
+ " of first argument of function " + getName ( ) ,
ErrorCodes : : ILLEGAL_COLUMN ) ;
}
2017-04-01 07:20:54 +00:00
2014-07-04 09:42:56 +00:00
template < bool first >
2020-07-21 13:58:07 +00:00
void executeArray ( const IDataType * type , const IColumn * column , typename ColumnVector < ToType > : : Container & vec_to ) const
2014-07-03 11:24:01 +00:00
{
2017-07-21 06:35:58 +00:00
const IDataType * nested_type = typeid_cast < const DataTypeArray * > ( type ) - > getNestedType ( ) . get ( ) ;
2017-04-01 07:20:54 +00:00
2017-07-21 06:35:58 +00:00
if ( const ColumnArray * col_from = checkAndGetColumn < ColumnArray > ( column ) )
2014-07-03 11:24:01 +00:00
{
const IColumn * nested_column = & col_from - > getData ( ) ;
2017-12-15 21:32:25 +00:00
const ColumnArray : : Offsets & offsets = col_from - > getOffsets ( ) ;
2015-07-17 15:56:08 +00:00
const size_t nested_size = nested_column - > size ( ) ;
2017-04-01 07:20:54 +00:00
2018-10-23 17:43:09 +00:00
typename ColumnVector < ToType > : : Container vec_temp ( nested_size ) ;
2022-04-29 18:48:19 +00:00
bool nested_is_first = true ;
executeForArgument ( nested_type , nested_column , vec_temp , nested_is_first ) ;
2017-04-01 07:20:54 +00:00
2015-07-17 15:56:08 +00:00
const size_t size = offsets . size ( ) ;
2017-04-01 07:20:54 +00:00
2018-07-30 18:00:16 +00:00
ColumnArray : : Offset current_offset = 0 ;
2014-07-03 11:24:01 +00:00
for ( size_t i = 0 ; i < size ; + + i )
{
2018-07-30 18:00:16 +00:00
ColumnArray : : Offset next_offset = offsets [ i ] ;
2017-04-01 07:20:54 +00:00
2018-10-23 17:43:09 +00:00
ToType h ;
if constexpr ( std : : is_same_v < ToType , UInt64 > )
h = IntHash64Impl : : apply ( next_offset - current_offset ) ;
else
h = IntHash32Impl : : apply ( next_offset - current_offset ) ;
2020-08-19 11:52:17 +00:00
if constexpr ( first )
2014-07-04 09:42:56 +00:00
vec_to [ i ] = h ;
else
2018-10-24 13:12:59 +00:00
vec_to [ i ] = Impl : : combineHashes ( vec_to [ i ] , h ) ;
2017-04-01 07:20:54 +00:00
2018-07-30 18:00:16 +00:00
for ( size_t j = current_offset ; j < next_offset ; + + j )
2018-10-24 13:12:59 +00:00
vec_to [ i ] = Impl : : combineHashes ( vec_to [ i ] , vec_temp [ j ] ) ;
2018-07-30 18:00:16 +00:00
current_offset = offsets [ i ] ;
2014-07-03 11:24:01 +00:00
}
}
2019-01-04 12:10:00 +00:00
else if ( const ColumnConst * col_from_const = checkAndGetColumnConst < ColumnArray > ( column ) )
2014-07-03 11:24:01 +00:00
{
2017-05-27 15:45:25 +00:00
/// NOTE: here, of course, you can do without the materialization of the column.
2019-01-04 12:10:00 +00:00
ColumnPtr full_column = col_from_const - > convertToFullColumn ( ) ;
2014-07-04 09:42:56 +00:00
executeArray < first > ( type , & * full_column , vec_to ) ;
2014-07-03 11:24:01 +00:00
}
else
throw Exception ( " Illegal column " + column - > getName ( )
+ " of first argument of function " + getName ( ) ,
ErrorCodes : : ILLEGAL_COLUMN ) ;
}
2017-04-01 07:20:54 +00:00
2014-07-04 09:42:56 +00:00
template < bool first >
2020-07-21 13:58:07 +00:00
void executeAny ( const IDataType * from_type , const IColumn * icolumn , typename ColumnVector < ToType > : : Container & vec_to ) const
2014-07-03 11:24:01 +00:00
{
2018-09-07 14:37:26 +00:00
WhichDataType which ( from_type ) ;
2019-11-13 14:20:23 +00:00
if ( which . isUInt8 ( ) ) executeIntType < UInt8 , first > ( icolumn , vec_to ) ;
else if ( which . isUInt16 ( ) ) executeIntType < UInt16 , first > ( icolumn , vec_to ) ;
else if ( which . isUInt32 ( ) ) executeIntType < UInt32 , first > ( icolumn , vec_to ) ;
else if ( which . isUInt64 ( ) ) executeIntType < UInt64 , first > ( icolumn , vec_to ) ;
2021-05-05 22:20:49 +00:00
else if ( which . isUInt128 ( ) ) executeBigIntType < UInt128 , first > ( icolumn , vec_to ) ;
2020-09-01 09:54:50 +00:00
else if ( which . isUInt256 ( ) ) executeBigIntType < UInt256 , first > ( icolumn , vec_to ) ;
2019-11-13 14:20:23 +00:00
else if ( which . isInt8 ( ) ) executeIntType < Int8 , first > ( icolumn , vec_to ) ;
else if ( which . isInt16 ( ) ) executeIntType < Int16 , first > ( icolumn , vec_to ) ;
else if ( which . isInt32 ( ) ) executeIntType < Int32 , first > ( icolumn , vec_to ) ;
else if ( which . isInt64 ( ) ) executeIntType < Int64 , first > ( icolumn , vec_to ) ;
2020-08-19 11:52:17 +00:00
else if ( which . isInt128 ( ) ) executeBigIntType < Int128 , first > ( icolumn , vec_to ) ;
2020-09-01 09:54:50 +00:00
else if ( which . isInt256 ( ) ) executeBigIntType < Int256 , first > ( icolumn , vec_to ) ;
2021-05-04 11:53:29 +00:00
else if ( which . isUUID ( ) ) executeBigIntType < UUID , first > ( icolumn , vec_to ) ;
2019-11-13 14:20:23 +00:00
else if ( which . isEnum8 ( ) ) executeIntType < Int8 , first > ( icolumn , vec_to ) ;
else if ( which . isEnum16 ( ) ) executeIntType < Int16 , first > ( icolumn , vec_to ) ;
else if ( which . isDate ( ) ) executeIntType < UInt16 , first > ( icolumn , vec_to ) ;
2021-07-15 11:41:52 +00:00
else if ( which . isDate32 ( ) ) executeIntType < Int32 , first > ( icolumn , vec_to ) ;
2019-11-13 14:20:23 +00:00
else if ( which . isDateTime ( ) ) executeIntType < UInt32 , first > ( icolumn , vec_to ) ;
2020-08-19 11:52:17 +00:00
/// TODO: executeIntType() for Decimal32/64 leads to incompatible result
else if ( which . isDecimal32 ( ) ) executeBigIntType < Decimal32 , first > ( icolumn , vec_to ) ;
else if ( which . isDecimal64 ( ) ) executeBigIntType < Decimal64 , first > ( icolumn , vec_to ) ;
else if ( which . isDecimal128 ( ) ) executeBigIntType < Decimal128 , first > ( icolumn , vec_to ) ;
else if ( which . isDecimal256 ( ) ) executeBigIntType < Decimal256 , first > ( icolumn , vec_to ) ;
2019-11-13 14:20:23 +00:00
else if ( which . isFloat32 ( ) ) executeIntType < Float32 , first > ( icolumn , vec_to ) ;
else if ( which . isFloat64 ( ) ) executeIntType < Float64 , first > ( icolumn , vec_to ) ;
2018-09-07 14:37:26 +00:00
else if ( which . isString ( ) ) executeString < first > ( icolumn , vec_to ) ;
else if ( which . isFixedString ( ) ) executeString < first > ( icolumn , vec_to ) ;
else if ( which . isArray ( ) ) executeArray < first > ( from_type , icolumn , vec_to ) ;
2022-04-29 18:48:19 +00:00
else executeGeneric < first > ( icolumn , vec_to ) ;
2014-07-03 11:24:01 +00:00
}
2019-11-13 15:18:24 +00:00
2020-07-21 13:58:07 +00:00
void executeForArgument ( const IDataType * type , const IColumn * column , typename ColumnVector < ToType > : : Container & vec_to , bool & is_first ) const
2015-10-29 00:12:04 +00:00
{
2017-02-08 20:33:50 +00:00
/// Flattening of tuples.
2015-10-29 00:12:04 +00:00
if ( const ColumnTuple * tuple = typeid_cast < const ColumnTuple * > ( column ) )
{
2019-03-25 01:43:54 +00:00
const auto & tuple_columns = tuple - > getColumns ( ) ;
2017-12-08 00:50:25 +00:00
const DataTypes & tuple_types = typeid_cast < const DataTypeTuple & > ( * type ) . getElements ( ) ;
size_t tuple_size = tuple_columns . size ( ) ;
for ( size_t i = 0 ; i < tuple_size ; + + i )
executeForArgument ( tuple_types [ i ] . get ( ) , tuple_columns [ i ] . get ( ) , vec_to , is_first ) ;
2015-10-29 00:12:04 +00:00
}
2019-01-04 12:10:00 +00:00
else if ( const ColumnTuple * tuple_const = checkAndGetColumnConstData < ColumnTuple > ( column ) )
2016-07-10 07:24:24 +00:00
{
2019-03-25 01:43:54 +00:00
const auto & tuple_columns = tuple_const - > getColumns ( ) ;
2017-12-09 12:23:09 +00:00
const DataTypes & tuple_types = typeid_cast < const DataTypeTuple & > ( * type ) . getElements ( ) ;
size_t tuple_size = tuple_columns . size ( ) ;
for ( size_t i = 0 ; i < tuple_size ; + + i )
{
2017-12-16 05:46:46 +00:00
auto tmp = ColumnConst : : create ( tuple_columns [ i ] , column - > size ( ) ) ;
executeForArgument ( tuple_types [ i ] . get ( ) , tmp . get ( ) , vec_to , is_first ) ;
2017-12-09 12:23:09 +00:00
}
2016-07-10 07:24:24 +00:00
}
2022-07-28 19:12:00 +00:00
else if ( const auto * map = checkAndGetColumn < ColumnMap > ( column ) )
{
const auto & type_map = assert_cast < const DataTypeMap & > ( * type ) ;
executeForArgument ( type_map . getNestedType ( ) . get ( ) , map - > getNestedColumnPtr ( ) . get ( ) , vec_to , is_first ) ;
}
else if ( const auto * const_map = checkAndGetColumnConstData < ColumnMap > ( column ) )
{
const auto & type_map = assert_cast < const DataTypeMap & > ( * type ) ;
executeForArgument ( type_map . getNestedType ( ) . get ( ) , const_map - > getNestedColumnPtr ( ) . get ( ) , vec_to , is_first ) ;
}
2015-10-29 00:12:04 +00:00
else
{
if ( is_first )
executeAny < true > ( type , column , vec_to ) ;
else
executeAny < false > ( type , column , vec_to ) ;
}
2017-04-01 07:20:54 +00:00
2015-10-29 00:12:04 +00:00
is_first = false ;
}
2014-07-03 11:24:01 +00:00
public :
2015-10-11 23:36:45 +00:00
String getName ( ) const override
2014-07-03 11:24:01 +00:00
{
2014-11-12 17:23:26 +00:00
return name ;
2014-07-03 11:24:01 +00:00
}
2017-04-01 07:20:54 +00:00
2016-12-29 19:38:10 +00:00
bool isVariadic ( ) const override { return true ; }
size_t getNumberOfArguments ( ) const override { return 0 ; }
2017-12-17 10:51:19 +00:00
bool useDefaultImplementationForConstants ( ) const override { return true ; }
2021-06-22 16:21:23 +00:00
bool isSuitableForShortCircuitArgumentsExecution ( const DataTypesWithConstInfo & /*arguments*/ ) const override { return true ; }
2017-04-01 07:20:54 +00:00
2017-12-02 02:47:12 +00:00
DataTypePtr getReturnTypeImpl ( const DataTypes & /*arguments*/ ) const override
2014-07-03 11:24:01 +00:00
{
2021-09-13 08:20:20 +00:00
if constexpr ( std : : is_same_v < ToType , UInt128 > ) /// backward-compatible
{
return std : : make_shared < DataTypeFixedString > ( sizeof ( UInt128 ) ) ;
}
else
return std : : make_shared < DataTypeNumber < ToType > > ( ) ;
2014-07-03 11:24:01 +00:00
}
2017-04-01 07:20:54 +00:00
2020-11-17 13:24:45 +00:00
ColumnPtr executeImpl ( const ColumnsWithTypeAndName & arguments , const DataTypePtr & , size_t input_rows_count ) const override
2014-07-03 11:24:01 +00:00
{
2018-04-24 07:16:39 +00:00
size_t rows = input_rows_count ;
2018-10-23 17:43:09 +00:00
auto col_to = ColumnVector < ToType > : : create ( rows ) ;
2017-04-01 07:20:54 +00:00
2018-10-23 17:43:09 +00:00
typename ColumnVector < ToType > : : Container & vec_to = col_to - > getData ( ) ;
2017-04-01 07:20:54 +00:00
2014-07-04 09:42:56 +00:00
if ( arguments . empty ( ) )
{
2017-02-08 20:33:50 +00:00
/// Constant random number from /dev/urandom is used as a hash value of empty list of arguments.
2018-10-23 17:43:09 +00:00
vec_to . assign ( rows , static_cast < ToType > ( 0xe28dbde7fe22e41c ) ) ;
2014-07-04 09:42:56 +00:00
}
2017-04-01 07:20:54 +00:00
2018-10-13 14:33:43 +00:00
/// The function supports arbitrary number of arguments of arbitrary types.
2017-04-01 07:20:54 +00:00
2015-10-29 00:12:04 +00:00
bool is_first_argument = true ;
2020-10-18 19:00:13 +00:00
for ( const auto & col : arguments )
2015-10-29 00:12:04 +00:00
executeForArgument ( col . type . get ( ) , col . column . get ( ) , vec_to , is_first_argument ) ;
2017-04-01 07:20:54 +00:00
2021-09-13 08:20:20 +00:00
if constexpr ( std : : is_same_v < ToType , UInt128 > ) /// backward-compatible
{
auto col_to_fixed_string = ColumnFixedString : : create ( sizeof ( UInt128 ) ) ;
col_to_fixed_string - > getChars ( ) = std : : move ( * reinterpret_cast < ColumnFixedString : : Chars * > ( & col_to - > getData ( ) ) ) ;
return col_to_fixed_string ;
}
2020-10-18 19:00:13 +00:00
return col_to ;
2014-07-03 11:24:01 +00:00
}
} ;
2020-05-26 15:56:46 +00:00
) // DECLARE_MULTITARGET_CODE
template < typename Impl >
class FunctionAnyHash : public TargetSpecific : : Default : : FunctionAnyHash < Impl >
{
public :
2021-04-10 23:33:54 +00:00
explicit FunctionAnyHash ( ContextPtr context ) : selector ( context )
2020-05-26 15:56:46 +00:00
{
selector . registerImplementation < TargetArch : : Default ,
TargetSpecific : : Default : : FunctionAnyHash < Impl > > ( ) ;
# if USE_MULTITARGET_CODE
selector . registerImplementation < TargetArch : : AVX2 ,
TargetSpecific : : AVX2 : : FunctionAnyHash < Impl > > ( ) ;
selector . registerImplementation < TargetArch : : AVX512F ,
TargetSpecific : : AVX512F : : FunctionAnyHash < Impl > > ( ) ;
# endif
}
2020-11-17 13:24:45 +00:00
ColumnPtr executeImpl ( const ColumnsWithTypeAndName & arguments , const DataTypePtr & result_type , size_t input_rows_count ) const override
2020-05-26 15:56:46 +00:00
{
2020-10-18 19:00:13 +00:00
return selector . selectAndExecute ( arguments , result_type , input_rows_count ) ;
2020-05-26 15:56:46 +00:00
}
2021-04-10 23:33:54 +00:00
static FunctionPtr create ( ContextPtr context )
2020-05-26 15:56:46 +00:00
{
return std : : make_shared < FunctionAnyHash > ( context ) ;
}
private :
ImplementationSelector < IFunction > selector ;
} ;
2014-07-03 11:24:01 +00:00
2015-04-15 15:00:28 +00:00
struct URLHashImpl
{
2017-07-21 06:35:58 +00:00
static UInt64 apply ( const char * data , const size_t size )
2015-04-15 15:00:28 +00:00
{
/// do not take last slash, '?' or '#' character into account
if ( size > 0 & & ( data [ size - 1 ] = = ' / ' | | data [ size - 1 ] = = ' ? ' | | data [ size - 1 ] = = ' # ' ) )
2017-06-21 08:35:38 +00:00
return CityHash_v1_0_2 : : CityHash64 ( data , size - 1 ) ;
2017-04-01 07:20:54 +00:00
2017-06-21 08:35:38 +00:00
return CityHash_v1_0_2 : : CityHash64 ( data , size ) ;
2015-04-15 15:00:28 +00:00
}
} ;
struct URLHierarchyHashImpl
{
2017-07-21 06:35:58 +00:00
static size_t findLevelLength ( const UInt64 level , const char * begin , const char * end )
2015-04-15 15:00:28 +00:00
{
2020-10-18 19:00:13 +00:00
const auto * pos = begin ;
2017-04-01 07:20:54 +00:00
2017-05-27 15:45:25 +00:00
/// Let's parse everything that goes before the path
2017-04-01 07:20:54 +00:00
2017-05-27 15:45:25 +00:00
/// Suppose that the protocol has already been changed to lowercase.
2015-04-15 15:00:28 +00:00
while ( pos < end & & ( ( * pos > ' a ' & & * pos < ' z ' ) | | ( * pos > ' 0 ' & & * pos < ' 9 ' ) ) )
+ + pos ;
2017-04-01 07:20:54 +00:00
2017-05-27 15:45:25 +00:00
/** We will calculate the hierarchy only for URLs in which there is a protocol, and after it there are two slashes.
* ( http , file - fit , mailto , magnet - do not fit ) , and after two slashes there is still something
* For the rest , simply return the full URL as the only element of the hierarchy .
2015-04-15 15:00:28 +00:00
*/
if ( pos = = begin | | pos = = end | | ! ( * pos + + = = ' : ' & & pos < end & & * pos + + = = ' / ' & & pos < end & & * pos + + = = ' / ' & & pos < end ) )
{
pos = end ;
return 0 = = level ? pos - begin : 0 ;
}
2017-04-01 07:20:54 +00:00
2017-05-27 15:45:25 +00:00
/// The domain for simplicity is everything that after the protocol and the two slashes, until the next slash or before `?` or `#`
2015-04-15 15:00:28 +00:00
while ( pos < end & & ! ( * pos = = ' / ' | | * pos = = ' ? ' | | * pos = = ' # ' ) )
+ + pos ;
2017-04-01 07:20:54 +00:00
2015-04-15 15:00:28 +00:00
if ( pos ! = end )
+ + pos ;
2017-04-01 07:20:54 +00:00
2015-04-15 15:00:28 +00:00
if ( 0 = = level )
return pos - begin ;
2017-04-01 07:20:54 +00:00
2015-04-15 15:00:28 +00:00
UInt64 current_level = 0 ;
2017-04-01 07:20:54 +00:00
2015-04-15 15:00:28 +00:00
while ( current_level ! = level & & pos < end )
{
2017-05-27 15:45:25 +00:00
/// We go to the next `/` or `?` or `#`, skipping all at the beginning.
2015-04-15 15:00:28 +00:00
while ( pos < end & & ( * pos = = ' / ' | | * pos = = ' ? ' | | * pos = = ' # ' ) )
+ + pos ;
if ( pos = = end )
break ;
while ( pos < end & & ! ( * pos = = ' / ' | | * pos = = ' ? ' | | * pos = = ' # ' ) )
+ + pos ;
2017-04-01 07:20:54 +00:00
2015-04-15 15:00:28 +00:00
if ( pos ! = end )
+ + pos ;
2017-04-01 07:20:54 +00:00
2015-04-15 15:00:28 +00:00
+ + current_level ;
}
2017-04-01 07:20:54 +00:00
2015-04-15 15:00:28 +00:00
return current_level = = level ? pos - begin : 0 ;
}
2017-04-01 07:20:54 +00:00
2017-07-21 06:35:58 +00:00
static UInt64 apply ( const UInt64 level , const char * data , const size_t size )
2015-04-15 15:00:28 +00:00
{
return URLHashImpl : : apply ( data , findLevelLength ( level , data , data + size ) ) ;
}
} ;
class FunctionURLHash : public IFunction
{
public :
static constexpr auto name = " URLHash " ;
2021-04-10 23:33:54 +00:00
static FunctionPtr create ( ContextPtr ) { return std : : make_shared < FunctionURLHash > ( ) ; }
2017-04-01 07:20:54 +00:00
2015-04-15 15:00:28 +00:00
String getName ( ) const override { return name ; }
2017-04-01 07:20:54 +00:00
2016-12-29 19:38:10 +00:00
bool isVariadic ( ) const override { return true ; }
size_t getNumberOfArguments ( ) const override { return 0 ; }
2021-06-22 16:21:23 +00:00
bool isSuitableForShortCircuitArgumentsExecution ( const DataTypesWithConstInfo & /*arguments*/ ) const override { return true ; }
2017-04-01 07:20:54 +00:00
2016-07-06 09:47:55 +00:00
DataTypePtr getReturnTypeImpl ( const DataTypes & arguments ) const override
2015-04-15 15:00:28 +00:00
{
const auto arg_count = arguments . size ( ) ;
if ( arg_count ! = 1 & & arg_count ! = 2 )
2018-05-07 02:01:11 +00:00
throw Exception { " Number of arguments for function " + getName ( ) + " doesn't match: passed " +
toString ( arg_count ) + " , should be 1 or 2. " , ErrorCodes : : NUMBER_OF_ARGUMENTS_DOESNT_MATCH } ;
2017-04-01 07:20:54 +00:00
2020-10-18 19:00:13 +00:00
const auto * first_arg = arguments . front ( ) . get ( ) ;
2018-09-07 14:37:26 +00:00
if ( ! WhichDataType ( first_arg ) . isString ( ) )
2018-05-07 02:01:11 +00:00
throw Exception { " Illegal type " + first_arg - > getName ( ) + " of argument of function " + getName ( ) , ErrorCodes : : ILLEGAL_TYPE_OF_ARGUMENT } ;
2017-04-01 07:20:54 +00:00
2015-04-15 15:00:28 +00:00
if ( arg_count = = 2 )
{
2018-09-07 14:37:26 +00:00
const auto & second_arg = arguments . back ( ) ;
if ( ! isInteger ( second_arg ) )
2018-05-07 02:01:11 +00:00
throw Exception { " Illegal type " + second_arg - > getName ( ) + " of argument of function " + getName ( ) , ErrorCodes : : ILLEGAL_TYPE_OF_ARGUMENT } ;
2015-04-15 15:00:28 +00:00
}
2017-04-01 07:20:54 +00:00
2016-05-28 07:48:40 +00:00
return std : : make_shared < DataTypeUInt64 > ( ) ;
2015-04-15 15:00:28 +00:00
}
2017-04-01 07:20:54 +00:00
2017-07-23 08:40:43 +00:00
bool useDefaultImplementationForConstants ( ) const override { return true ; }
ColumnNumbers getArgumentsThatAreAlwaysConstant ( ) const override { return { 1 } ; }
2020-11-17 13:24:45 +00:00
ColumnPtr executeImpl ( const ColumnsWithTypeAndName & arguments , const DataTypePtr & , size_t /*input_rows_count*/ ) const override
2015-04-15 15:00:28 +00:00
{
const auto arg_count = arguments . size ( ) ;
2017-04-01 07:20:54 +00:00
2015-04-15 15:00:28 +00:00
if ( arg_count = = 1 )
2020-10-18 19:00:13 +00:00
return executeSingleArg ( arguments ) ;
2015-04-15 15:00:28 +00:00
else if ( arg_count = = 2 )
2020-10-18 19:00:13 +00:00
return executeTwoArgs ( arguments ) ;
2015-04-15 15:00:28 +00:00
else
2017-06-13 02:06:53 +00:00
throw Exception { " got into IFunction::execute with unexpected number of arguments " , ErrorCodes : : LOGICAL_ERROR } ;
2015-04-15 15:00:28 +00:00
}
private :
2020-11-17 13:24:45 +00:00
ColumnPtr executeSingleArg ( const ColumnsWithTypeAndName & arguments ) const
2015-04-15 15:00:28 +00:00
{
2020-10-18 19:00:13 +00:00
const auto * col_untyped = arguments . front ( ) . column . get ( ) ;
2017-04-01 07:20:54 +00:00
2020-10-18 19:00:13 +00:00
if ( const auto * col_from = checkAndGetColumn < ColumnString > ( col_untyped ) )
2015-04-15 15:00:28 +00:00
{
const auto size = col_from - > size ( ) ;
2017-12-16 05:46:46 +00:00
auto col_to = ColumnUInt64 : : create ( size ) ;
2017-04-01 07:20:54 +00:00
2015-04-15 15:00:28 +00:00
const auto & chars = col_from - > getChars ( ) ;
const auto & offsets = col_from - > getOffsets ( ) ;
auto & out = col_to - > getData ( ) ;
2017-04-01 07:20:54 +00:00
2018-07-30 18:00:16 +00:00
ColumnString : : Offset current_offset = 0 ;
for ( size_t i = 0 ; i < size ; + + i )
{
2015-04-15 15:00:28 +00:00
out [ i ] = URLHashImpl : : apply (
2018-07-30 18:00:16 +00:00
reinterpret_cast < const char * > ( & chars [ current_offset ] ) ,
offsets [ i ] - current_offset - 1 ) ;
current_offset = offsets [ i ] ;
}
2017-12-16 05:21:04 +00:00
2020-10-18 19:00:13 +00:00
return col_to ;
2015-04-15 15:00:28 +00:00
}
else
2020-10-18 19:00:13 +00:00
throw Exception { " Illegal column " + arguments [ 0 ] . column - > getName ( ) +
2018-05-07 02:01:11 +00:00
" of argument of function " + getName ( ) , ErrorCodes : : ILLEGAL_COLUMN } ;
2015-04-15 15:00:28 +00:00
}
2017-04-01 07:20:54 +00:00
2020-11-17 13:24:45 +00:00
ColumnPtr executeTwoArgs ( const ColumnsWithTypeAndName & arguments ) const
2015-04-15 15:00:28 +00:00
{
2020-10-18 19:00:13 +00:00
const auto * level_col = arguments . back ( ) . column . get ( ) ;
2019-06-27 19:28:52 +00:00
if ( ! isColumnConst ( * level_col ) )
2018-05-07 02:01:11 +00:00
throw Exception { " Second argument of function " + getName ( ) + " must be an integral constant " , ErrorCodes : : ILLEGAL_COLUMN } ;
2017-04-01 07:20:54 +00:00
2015-04-15 15:00:28 +00:00
const auto level = level_col - > get64 ( 0 ) ;
2017-04-01 07:20:54 +00:00
2020-10-18 19:00:13 +00:00
const auto * col_untyped = arguments . front ( ) . column . get ( ) ;
if ( const auto * col_from = checkAndGetColumn < ColumnString > ( col_untyped ) )
2015-04-15 15:00:28 +00:00
{
const auto size = col_from - > size ( ) ;
2017-12-16 05:46:46 +00:00
auto col_to = ColumnUInt64 : : create ( size ) ;
2017-04-01 07:20:54 +00:00
2015-04-15 15:00:28 +00:00
const auto & chars = col_from - > getChars ( ) ;
const auto & offsets = col_from - > getOffsets ( ) ;
auto & out = col_to - > getData ( ) ;
2017-04-01 07:20:54 +00:00
2018-07-30 18:00:16 +00:00
ColumnString : : Offset current_offset = 0 ;
for ( size_t i = 0 ; i < size ; + + i )
{
out [ i ] = URLHierarchyHashImpl : : apply (
level ,
reinterpret_cast < const char * > ( & chars [ current_offset ] ) ,
offsets [ i ] - current_offset - 1 ) ;
current_offset = offsets [ i ] ;
}
2017-12-16 05:21:04 +00:00
2020-10-18 19:00:13 +00:00
return col_to ;
2015-04-15 15:00:28 +00:00
}
else
2020-10-18 19:00:13 +00:00
throw Exception { " Illegal column " + arguments [ 0 ] . column - > getName ( ) +
2018-05-07 02:01:11 +00:00
" of argument of function " + getName ( ) , ErrorCodes : : ILLEGAL_COLUMN } ;
2015-04-15 15:00:28 +00:00
}
} ;
2022-04-20 23:26:37 +00:00
struct ImplWyHash64
{
static constexpr auto name = " wyHash64 " ;
using ReturnType = UInt64 ;
static UInt64 apply ( const char * s , const size_t len )
{
2022-04-20 23:31:31 +00:00
return wyhash ( s , len , 0 , _wyp ) ;
2022-04-20 23:26:37 +00:00
}
static UInt64 combineHashes ( UInt64 h1 , UInt64 h2 )
{
union
{
UInt64 u64 [ 2 ] ;
2022-04-20 23:31:31 +00:00
char chars [ 16 ] ;
2022-04-20 23:26:37 +00:00
} ;
u64 [ 0 ] = h1 ;
2022-04-20 23:31:31 +00:00
u64 [ 1 ] = h2 ;
return apply ( chars , 16 ) ;
2022-04-20 23:26:37 +00:00
}
static constexpr bool use_int_hash_for_pods = false ;
} ;
2015-04-15 15:00:28 +00:00
2017-06-13 02:06:53 +00:00
struct NameIntHash32 { static constexpr auto name = " intHash32 " ; } ;
struct NameIntHash64 { static constexpr auto name = " intHash64 " ; } ;
2018-08-02 09:21:26 +00:00
2018-10-23 17:43:09 +00:00
using FunctionSipHash64 = FunctionAnyHash < SipHash64Impl > ;
2015-07-17 15:56:08 +00:00
using FunctionIntHash32 = FunctionIntHash < IntHash32Impl , NameIntHash32 > ;
using FunctionIntHash64 = FunctionIntHash < IntHash64Impl , NameIntHash64 > ;
2019-03-22 11:18:24 +00:00
# if USE_SSL
2021-10-01 01:19:03 +00:00
using FunctionMD4 = FunctionStringHashFixedString < MD4Impl > ;
using FunctionHalfMD5 = FunctionAnyHash < HalfMD5Impl > ;
2015-07-17 15:56:08 +00:00
using FunctionMD5 = FunctionStringHashFixedString < MD5Impl > ;
using FunctionSHA1 = FunctionStringHashFixedString < SHA1Impl > ;
using FunctionSHA224 = FunctionStringHashFixedString < SHA224Impl > ;
using FunctionSHA256 = FunctionStringHashFixedString < SHA256Impl > ;
2021-10-01 01:19:03 +00:00
using FunctionSHA384 = FunctionStringHashFixedString < SHA384Impl > ;
2021-08-19 02:57:20 +00:00
using FunctionSHA512 = FunctionStringHashFixedString < SHA512Impl > ;
2019-03-22 11:18:24 +00:00
# endif
2021-09-13 08:20:20 +00:00
using FunctionSipHash128 = FunctionAnyHash < SipHash128Impl > ;
2018-10-23 17:43:09 +00:00
using FunctionCityHash64 = FunctionAnyHash < ImplCityHash64 > ;
2020-10-31 12:45:53 +00:00
using FunctionFarmFingerprint64 = FunctionAnyHash < ImplFarmFingerprint64 > ;
2018-10-23 17:43:09 +00:00
using FunctionFarmHash64 = FunctionAnyHash < ImplFarmHash64 > ;
using FunctionMetroHash64 = FunctionAnyHash < ImplMetroHash64 > ;
2020-04-16 12:31:57 +00:00
2018-10-23 17:43:09 +00:00
using FunctionMurmurHash2_32 = FunctionAnyHash < MurmurHash2Impl32 > ;
using FunctionMurmurHash2_64 = FunctionAnyHash < MurmurHash2Impl64 > ;
2019-01-09 02:03:50 +00:00
using FunctionGccMurmurHash = FunctionAnyHash < GccMurmurHashImpl > ;
2018-10-23 17:43:09 +00:00
using FunctionMurmurHash3_32 = FunctionAnyHash < MurmurHash3Impl32 > ;
using FunctionMurmurHash3_64 = FunctionAnyHash < MurmurHash3Impl64 > ;
2021-09-13 08:20:20 +00:00
using FunctionMurmurHash3_128 = FunctionAnyHash < MurmurHash3Impl128 > ;
2020-04-16 12:31:57 +00:00
2018-12-18 20:24:16 +00:00
using FunctionJavaHash = FunctionAnyHash < JavaHashImpl > ;
2019-11-06 10:46:37 +00:00
using FunctionJavaHashUTF16LE = FunctionAnyHash < JavaHashUTF16LEImpl > ;
2018-12-18 20:24:16 +00:00
using FunctionHiveHash = FunctionAnyHash < HiveHashImpl > ;
2018-12-21 17:53:16 +00:00
2022-01-18 06:51:13 +00:00
using FunctionXxHash32 = FunctionAnyHash < ImplXxHash32 > ;
using FunctionXxHash64 = FunctionAnyHash < ImplXxHash64 > ;
2018-12-23 19:25:40 +00:00
2022-04-20 23:26:37 +00:00
using FunctionWyHash64 = FunctionAnyHash < ImplWyHash64 > ;
2022-10-05 20:38:44 +00:00
using FunctionBLAKE3 = FunctionStringHashFixedString < ImplBLAKE3 > ;
2018-07-30 13:50:26 +00:00
}