2019-01-21 10:39:53 +00:00
# pragma once
# include <Columns/IColumn.h>
2020-12-10 23:56:57 +00:00
# include <Columns/ColumnNullable.h>
2019-08-21 02:28:04 +00:00
# include <Common/assert_cast.h>
2019-07-31 15:44:03 +00:00
# include <Common/HashTable/HashTableKeyHolder.h>
2019-01-21 10:39:53 +00:00
# include <Interpreters/AggregationCommon.h>
2019-08-21 02:28:04 +00:00
2019-01-21 10:39:53 +00:00
namespace DB
{
2020-02-25 18:10:48 +00:00
namespace ErrorCodes
{
extern const int LOGICAL_ERROR ;
}
2019-01-21 10:39:53 +00:00
namespace ColumnsHashing
{
2019-02-01 08:23:38 +00:00
/// Generic context for HashMethod. Context is shared between multiple threads, all methods must be thread-safe.
/// Is used for caching.
class HashMethodContext
{
public :
virtual ~ HashMethodContext ( ) = default ;
struct Settings
{
size_t max_threads ;
} ;
} ;
using HashMethodContextPtr = std : : shared_ptr < HashMethodContext > ;
2019-01-21 10:39:53 +00:00
namespace columns_hashing_impl
{
template < typename Value , bool consecutive_keys_optimization_ >
struct LastElementCache
{
static constexpr bool consecutive_keys_optimization = consecutive_keys_optimization_ ;
Value value ;
bool empty = true ;
bool found = false ;
bool check ( const Value & value_ ) { return ! empty & & value = = value_ ; }
template < typename Key >
2019-08-01 15:57:02 +00:00
bool check ( const Key & key ) { return ! empty & & value . first = = key ; }
2019-01-21 10:39:53 +00:00
} ;
template < typename Data >
struct LastElementCache < Data , false >
{
static constexpr bool consecutive_keys_optimization = false ;
} ;
template < typename Mapped >
class EmplaceResultImpl
{
Mapped & value ;
Mapped & cached_value ;
bool inserted ;
public :
2019-08-03 11:02:40 +00:00
EmplaceResultImpl ( Mapped & value_ , Mapped & cached_value_ , bool inserted_ )
: value ( value_ ) , cached_value ( cached_value_ ) , inserted ( inserted_ ) { }
2019-01-21 10:39:53 +00:00
bool isInserted ( ) const { return inserted ; }
2019-01-24 14:56:04 +00:00
auto & getMapped ( ) const { return value ; }
2019-02-06 17:17:59 +00:00
void setMapped ( const Mapped & mapped )
{
cached_value = mapped ;
value = mapped ;
}
2019-01-21 10:39:53 +00:00
} ;
template < >
class EmplaceResultImpl < void >
{
bool inserted ;
public :
2019-08-03 11:02:40 +00:00
explicit EmplaceResultImpl ( bool inserted_ ) : inserted ( inserted_ ) { }
2019-01-21 10:39:53 +00:00
bool isInserted ( ) const { return inserted ; }
} ;
2021-02-08 11:38:31 +00:00
/// FindResult optionally may contain pointer to value and offset in hashtable buffer.
/// Only bool found is required.
/// So we will have 4 different specializations for FindResultImpl
2021-02-04 14:46:36 +00:00
class FindResultImplBase
2019-01-21 10:39:53 +00:00
{
bool found ;
public :
2021-02-04 14:46:36 +00:00
explicit FindResultImplBase ( bool found_ ) : found ( found_ ) { }
2019-01-21 10:39:53 +00:00
bool isFound ( ) const { return found ; }
2021-02-04 14:46:36 +00:00
} ;
template < bool need_offset = false >
class FindResultImplOffsetBase
{
public :
constexpr static bool has_offset = need_offset ;
explicit FindResultImplOffsetBase ( size_t /* off */ ) { }
2019-01-21 10:39:53 +00:00
} ;
template < >
2021-02-04 14:46:36 +00:00
class FindResultImplOffsetBase < true >
2019-01-21 10:39:53 +00:00
{
2021-02-04 14:46:36 +00:00
size_t offset ;
public :
constexpr static bool has_offset = true ;
explicit FindResultImplOffsetBase ( size_t off ) : offset ( off ) { }
ALWAYS_INLINE size_t getOffset ( ) const { return offset ; }
} ;
template < typename Mapped , bool need_offset = false >
class FindResultImpl : public FindResultImplBase , public FindResultImplOffsetBase < need_offset >
{
Mapped * value ;
2019-01-21 10:39:53 +00:00
public :
2021-07-21 17:03:33 +00:00
FindResultImpl ( )
2022-04-04 20:47:14 +00:00
: FindResultImplBase ( false ) , FindResultImplOffsetBase < need_offset > ( 0 ) // NOLINT(clang-analyzer-optin.cplusplus.UninitializedObject) intentionally allow uninitialized value here
2021-07-21 17:03:33 +00:00
{ }
2021-02-04 14:46:36 +00:00
FindResultImpl ( Mapped * value_ , bool found_ , size_t off )
: FindResultImplBase ( found_ ) , FindResultImplOffsetBase < need_offset > ( off ) , value ( value_ ) { }
Mapped & getMapped ( ) const { return * value ; }
2019-01-21 10:39:53 +00:00
} ;
2021-02-04 14:46:36 +00:00
template < bool need_offset >
class FindResultImpl < void , need_offset > : public FindResultImplBase , public FindResultImplOffsetBase < need_offset >
{
public :
FindResultImpl ( bool found_ , size_t off ) : FindResultImplBase ( found_ ) , FindResultImplOffsetBase < need_offset > ( off ) { }
} ;
template < typename Derived , typename Value , typename Mapped , bool consecutive_keys_optimization , bool need_offset = false >
2019-02-01 08:23:38 +00:00
class HashMethodBase
2019-01-21 10:39:53 +00:00
{
2019-02-01 08:23:38 +00:00
public :
2019-01-21 10:39:53 +00:00
using EmplaceResult = EmplaceResultImpl < Mapped > ;
2021-02-04 14:46:36 +00:00
using FindResult = FindResultImpl < Mapped , need_offset > ;
2022-03-11 13:34:58 +00:00
static constexpr bool has_mapped = ! std : : is_same_v < Mapped , void > ;
2019-01-21 10:39:53 +00:00
using Cache = LastElementCache < Value , consecutive_keys_optimization > ;
2019-02-01 08:23:38 +00:00
static HashMethodContextPtr createContext ( const HashMethodContext : : Settings & ) { return nullptr ; }
template < typename Data >
ALWAYS_INLINE EmplaceResult emplaceKey ( Data & data , size_t row , Arena & pool )
{
2019-07-31 15:44:03 +00:00
auto key_holder = static_cast < Derived & > ( * this ) . getKeyHolder ( row , pool ) ;
return emplaceImpl ( key_holder , data ) ;
2019-02-01 08:23:38 +00:00
}
template < typename Data >
ALWAYS_INLINE FindResult findKey ( Data & data , size_t row , Arena & pool )
{
2019-07-31 15:44:03 +00:00
auto key_holder = static_cast < Derived & > ( * this ) . getKeyHolder ( row , pool ) ;
return findKeyImpl ( keyHolderGetKey ( key_holder ) , data ) ;
2019-02-01 08:23:38 +00:00
}
template < typename Data >
ALWAYS_INLINE size_t getHash ( const Data & data , size_t row , Arena & pool )
{
2019-07-31 15:44:03 +00:00
auto key_holder = static_cast < Derived & > ( * this ) . getKeyHolder ( row , pool ) ;
return data . hash ( keyHolderGetKey ( key_holder ) ) ;
2019-02-01 08:23:38 +00:00
}
2019-01-21 10:39:53 +00:00
protected :
Cache cache ;
HashMethodBase ( )
{
2019-02-04 14:36:15 +00:00
if constexpr ( consecutive_keys_optimization )
2019-01-21 10:39:53 +00:00
{
2019-02-04 14:36:15 +00:00
if constexpr ( has_mapped )
{
/// Init PairNoInit elements.
2019-08-01 15:57:02 +00:00
cache . value . second = Mapped ( ) ;
cache . value . first = { } ;
2019-02-04 14:36:15 +00:00
}
else
cache . value = Value ( ) ;
2019-01-21 10:39:53 +00:00
}
}
2019-07-31 15:44:03 +00:00
template < typename Data , typename KeyHolder >
ALWAYS_INLINE EmplaceResult emplaceImpl ( KeyHolder & key_holder , Data & data )
2019-01-21 10:39:53 +00:00
{
if constexpr ( Cache : : consecutive_keys_optimization )
{
2019-07-31 15:44:03 +00:00
if ( cache . found & & cache . check ( keyHolderGetKey ( key_holder ) ) )
2019-01-21 10:39:53 +00:00
{
if constexpr ( has_mapped )
2019-08-01 15:57:02 +00:00
return EmplaceResult ( cache . value . second , cache . value . second , false ) ;
2019-01-21 10:39:53 +00:00
else
return EmplaceResult ( false ) ;
}
}
2019-08-20 09:58:44 +00:00
typename Data : : LookupResult it ;
2019-01-21 10:39:53 +00:00
bool inserted = false ;
2019-07-31 15:44:03 +00:00
data . emplace ( key_holder , it , inserted ) ;
2019-02-01 08:23:38 +00:00
2019-02-04 14:36:15 +00:00
[[maybe_unused]] Mapped * cached = nullptr ;
if constexpr ( has_mapped )
2019-10-29 15:16:51 +00:00
cached = & it - > getMapped ( ) ;
2019-02-01 08:23:38 +00:00
if ( inserted )
{
if constexpr ( has_mapped )
2019-02-05 09:43:14 +00:00
{
2019-10-29 15:16:51 +00:00
new ( & it - > getMapped ( ) ) Mapped ( ) ;
2019-02-05 09:43:14 +00:00
}
2019-02-01 08:23:38 +00:00
}
2019-01-21 10:39:53 +00:00
if constexpr ( consecutive_keys_optimization )
{
cache . found = true ;
cache . empty = false ;
2019-02-04 14:36:15 +00:00
if constexpr ( has_mapped )
2019-08-20 09:58:44 +00:00
{
2019-10-29 15:16:51 +00:00
cache . value . first = it - > getKey ( ) ;
cache . value . second = it - > getMapped ( ) ;
2019-08-01 15:57:02 +00:00
cached = & cache . value . second ;
2019-08-20 09:58:44 +00:00
}
else
{
2019-10-29 15:16:51 +00:00
cache . value = it - > getKey ( ) ;
2019-08-20 09:58:44 +00:00
}
2019-01-21 10:39:53 +00:00
}
if constexpr ( has_mapped )
2019-10-29 15:16:51 +00:00
return EmplaceResult ( it - > getMapped ( ) , * cached , inserted ) ;
2019-01-21 10:39:53 +00:00
else
return EmplaceResult ( inserted ) ;
}
template < typename Data , typename Key >
ALWAYS_INLINE FindResult findKeyImpl ( Key key , Data & data )
{
if constexpr ( Cache : : consecutive_keys_optimization )
{
2021-02-08 11:38:31 +00:00
/// It's possible to support such combination, but code will became more complex.
/// Now there's not place where we need this options enabled together
2021-02-04 14:46:36 +00:00
static_assert ( ! FindResult : : has_offset , " `consecutive_keys_optimization` and `has_offset` are conflicting options " ) ;
2019-01-21 10:39:53 +00:00
if ( cache . check ( key ) )
{
if constexpr ( has_mapped )
2021-02-04 14:46:36 +00:00
return FindResult ( & cache . value . second , cache . found , 0 ) ;
2019-01-21 10:39:53 +00:00
else
2021-02-04 14:46:36 +00:00
return FindResult ( cache . found , 0 ) ;
2019-01-21 10:39:53 +00:00
}
}
auto it = data . find ( key ) ;
if constexpr ( consecutive_keys_optimization )
{
2019-08-20 09:58:44 +00:00
cache . found = it ! = nullptr ;
2019-01-21 10:39:53 +00:00
cache . empty = false ;
2019-08-20 09:58:44 +00:00
if constexpr ( has_mapped )
{
cache . value . first = key ;
if ( it )
{
2019-10-29 15:16:51 +00:00
cache . value . second = it - > getMapped ( ) ;
2019-08-20 09:58:44 +00:00
}
}
2019-01-21 10:39:53 +00:00
else
{
2019-08-20 09:58:44 +00:00
cache . value = key ;
2019-01-21 10:39:53 +00:00
}
}
2021-02-04 14:46:36 +00:00
size_t offset = 0 ;
if constexpr ( FindResult : : has_offset )
{
offset = it ? data . offsetInternal ( it ) : 0 ;
}
2019-01-21 10:39:53 +00:00
if constexpr ( has_mapped )
2021-02-04 14:46:36 +00:00
return FindResult ( it ? & it - > getMapped ( ) : nullptr , it ! = nullptr , offset ) ;
2019-01-21 10:39:53 +00:00
else
2021-02-04 14:46:36 +00:00
return FindResult ( it ! = nullptr , offset ) ;
2019-01-21 10:39:53 +00:00
}
} ;
template < typename T >
struct MappedCache : public PaddedPODArray < T > { } ;
template < >
struct MappedCache < void > { } ;
/// This class is designed to provide the functionality that is required for
/// supporting nullable keys in HashMethodKeysFixed. If there are
/// no nullable keys, this class is merely implemented as an empty shell.
template < typename Key , bool has_nullable_keys >
class BaseStateKeysFixed ;
/// Case where nullable keys are supported.
template < typename Key >
class BaseStateKeysFixed < Key , true >
{
protected :
2022-03-11 21:47:28 +00:00
explicit BaseStateKeysFixed ( const ColumnRawPtrs & key_columns )
2019-01-21 10:39:53 +00:00
{
null_maps . reserve ( key_columns . size ( ) ) ;
actual_columns . reserve ( key_columns . size ( ) ) ;
for ( const auto & col : key_columns )
{
2022-03-11 21:47:28 +00:00
if ( const auto * nullable_col = checkAndGetColumn < ColumnNullable > ( col ) )
2019-01-21 10:39:53 +00:00
{
2019-06-26 17:20:33 +00:00
actual_columns . push_back ( & nullable_col - > getNestedColumn ( ) ) ;
null_maps . push_back ( & nullable_col - > getNullMapColumn ( ) ) ;
2019-01-21 10:39:53 +00:00
}
else
{
actual_columns . push_back ( col ) ;
null_maps . push_back ( nullptr ) ;
}
}
}
/// Return the columns which actually contain the values of the keys.
/// For a given key column, if it is nullable, we return its nested
/// column. Otherwise we return the key column itself.
inline const ColumnRawPtrs & getActualColumns ( ) const
{
return actual_columns ;
}
/// Create a bitmap that indicates whether, for a particular row,
/// a key column bears a null value or not.
KeysNullMap < Key > createBitmap ( size_t row ) const
{
KeysNullMap < Key > bitmap { } ;
for ( size_t k = 0 ; k < null_maps . size ( ) ; + + k )
{
if ( null_maps [ k ] ! = nullptr )
{
2019-08-21 02:28:04 +00:00
const auto & null_map = assert_cast < const ColumnUInt8 & > ( * null_maps [ k ] ) . getData ( ) ;
2019-01-21 10:39:53 +00:00
if ( null_map [ row ] = = 1 )
{
size_t bucket = k / 8 ;
size_t offset = k % 8 ;
bitmap [ bucket ] | = UInt8 ( 1 ) < < offset ;
}
}
}
return bitmap ;
}
private :
ColumnRawPtrs actual_columns ;
ColumnRawPtrs null_maps ;
} ;
/// Case where nullable keys are not supported.
template < typename Key >
class BaseStateKeysFixed < Key , false >
{
protected :
2022-03-11 21:47:28 +00:00
explicit BaseStateKeysFixed ( const ColumnRawPtrs & columns ) : actual_columns ( columns ) { }
2019-01-21 10:39:53 +00:00
const ColumnRawPtrs & getActualColumns ( ) const { return actual_columns ; }
KeysNullMap < Key > createBitmap ( size_t ) const
{
throw Exception { " Internal error: calling createBitmap() for non-nullable keys "
" is forbidden " , ErrorCodes : : LOGICAL_ERROR } ;
}
private :
ColumnRawPtrs actual_columns ;
} ;
}
}
}