diff --git a/src/AggregateFunctions/AggregateFunctionEntropy.h b/src/AggregateFunctions/AggregateFunctionEntropy.h index 7586cebd8ec..59cacd0e9ca 100644 --- a/src/AggregateFunctions/AggregateFunctionEntropy.h +++ b/src/AggregateFunctions/AggregateFunctionEntropy.h @@ -23,18 +23,12 @@ struct EntropyData { using Weight = UInt64; - using HashingMap = HashMap< - Value, Weight, - HashCRC32, - HashTableGrower<4>, - HashTableAllocatorWithStackMemory) * (1 << 3)>>; + using HashingMap = HASH_TABLE_WITH_STACK_MEMORY(HashMap, Value, Weight, + HashCRC32, HashTableGrower<4>); /// For the case of pre-hashed values. - using TrivialMap = HashMap< - Value, Weight, - UInt128TrivialHash, - HashTableGrower<4>, - HashTableAllocatorWithStackMemory) * (1 << 3)>>; + using TrivialMap = HASH_TABLE_WITH_STACK_MEMORY(HashMap, Value, Weight, + UInt128TrivialHash, HashTableGrower<4>); using Map = std::conditional_t, TrivialMap, HashingMap>; diff --git a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h index 9dbf2c921c2..af97df209f6 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h +++ b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h @@ -28,12 +28,8 @@ template struct AggregateFunctionGroupUniqArrayData { /// When creating, the hash table must be small. - using Set = HashSet< - T, - DefaultHash, - HashTableGrower<4>, - HashTableAllocatorWithStackMemory - >; + using Set = HASH_TABLE_WITH_STACK_MEMORY(HashSet, T, DefaultHash, + HashTableGrower<4>); Set value; }; @@ -126,9 +122,10 @@ public: /// Generic implementation, it uses serialized representation as object descriptor. struct AggregateFunctionGroupUniqArrayGenericData { - static constexpr size_t INIT_ELEMS = 2; /// adjustable - static constexpr size_t ELEM_SIZE = sizeof(HashSetCellWithSavedHash); - using Set = HashSetWithSavedHash, HashTableAllocatorWithStackMemory>; + static constexpr size_t INITIAL_SIZE_DEGREE = 1; /// adjustable + + using Set = HASH_TABLE_WITH_STACK_MEMORY(HashSetWithSavedHash, + StringRef, StringRefHash, HashTableGrower); Set value; }; diff --git a/src/AggregateFunctions/AggregateFunctionTopK.h b/src/AggregateFunctions/AggregateFunctionTopK.h index 9c5e62bb6d7..23eb0e7ff09 100644 --- a/src/AggregateFunctions/AggregateFunctionTopK.h +++ b/src/AggregateFunctions/AggregateFunctionTopK.h @@ -23,13 +23,8 @@ namespace DB template struct AggregateFunctionTopKData { - using Set = SpaceSaving - < - T, - HashCRC32, - HashTableGrower<4>, - HashTableAllocatorWithStackMemory - >; + using Set = SpaceSaving>; + Set value; }; @@ -109,13 +104,7 @@ public: /// Generic implementation, it uses serialized representation as object descriptor. struct AggregateFunctionTopKGenericData { - using Set = SpaceSaving - < - StringRef, - StringRefHash, - HashTableGrower<4>, - HashTableAllocatorWithStackMemory - >; + using Set = SpaceSaving; Set value; }; diff --git a/src/AggregateFunctions/QuantileExactWeighted.h b/src/AggregateFunctions/QuantileExactWeighted.h index 6053bddc947..7731ff980fe 100644 --- a/src/AggregateFunctions/QuantileExactWeighted.h +++ b/src/AggregateFunctions/QuantileExactWeighted.h @@ -33,12 +33,8 @@ struct QuantileExactWeighted using Hasher = std::conditional_t, Int128Hash, HashCRC32>; /// When creating, the hash table must be small. - using Map = HashMap< - UnderlyingType, Weight, - Hasher, - HashTableGrower<4>, - HashTableAllocatorWithStackMemory) * (1 << 3)> - >; + using Map = HASH_TABLE_WITH_STACK_MEMORY(HashMap, UnderlyingType, Weight, + Hasher, HashTableGrower<4>); Map map; diff --git a/src/Common/Allocator.h b/src/Common/Allocator.h index 9add9299430..43d7e67c4bb 100644 --- a/src/Common/Allocator.h +++ b/src/Common/Allocator.h @@ -278,13 +278,15 @@ private: /** Allocator with optimization to place small memory ranges in automatic memory. */ -template +template class AllocatorWithStackMemory : private Base { private: - alignas(Alignment) char stack_memory[N]; + alignas(Alignment) char stack_memory[_initial_bytes]; public: + static constexpr size_t initial_bytes = _initial_bytes; + /// Do not use boost::noncopyable to avoid the warning about direct base /// being inaccessible due to ambiguity, when derived classes are also /// noncopiable (-Winaccessible-base). @@ -295,10 +297,10 @@ public: void * alloc(size_t size) { - if (size <= N) + if (size <= initial_bytes) { if constexpr (Base::clear_memory) - memset(stack_memory, 0, N); + memset(stack_memory, 0, initial_bytes); return stack_memory; } @@ -307,18 +309,18 @@ public: void free(void * buf, size_t size) { - if (size > N) + if (size > initial_bytes) Base::free(buf, size); } void * realloc(void * buf, size_t old_size, size_t new_size) { /// Was in stack_memory, will remain there. - if (new_size <= N) + if (new_size <= initial_bytes) return buf; /// Already was big enough to not fit in stack_memory. - if (old_size > N) + if (old_size > initial_bytes) return Base::realloc(buf, old_size, new_size, Alignment); /// Was in stack memory, but now will not fit there. @@ -330,10 +332,20 @@ public: protected: static constexpr size_t getStackThreshold() { - return N; + return initial_bytes; } }; +// A constant that gives the number of initially available bytes in +// the allocator. Used to check that this number is in sync with the +// initial size of array or hash table that uses the allocator. +template +constexpr size_t allocatorInitialBytes = 0; + +template +constexpr size_t allocatorInitialBytes> = initial_bytes; + #if !__clang__ #pragma GCC diagnostic pop diff --git a/src/Common/HashTable/HashTable.h b/src/Common/HashTable/HashTable.h index 58b7cd81901..2f7272ca8ec 100644 --- a/src/Common/HashTable/HashTable.h +++ b/src/Common/HashTable/HashTable.h @@ -308,7 +308,16 @@ struct ZeroValueStorage const Cell * zeroValue() const { return nullptr; } }; +// These templates give the initial hash table size, so that we can check +// that it is in sync with initial allocator size. +template +constexpr size_t growerInitialCount = 0; +template +constexpr size_t growerInitialCount> + = 1ULL << initial_size_degree; + +// The HashTable template < typename Key, @@ -324,6 +333,17 @@ class HashTable : protected Cell::State, protected ZeroValueStorage /// empty base optimization { +public: + // Export the initial buffer sizes for the ease of using allocators with + // inline memory. + static constexpr size_t initial_buffer_bytes + = growerInitialCount * sizeof(Cell); + + // If we use an allocator with inline memory, check that the initial + // size of the hash table is in sync with the amount of this memory. + static_assert(allocatorInitialBytes == 0 + || allocatorInitialBytes == initial_buffer_bytes); + protected: friend class const_iterator; friend class iterator; @@ -1075,3 +1095,13 @@ public: } #endif }; + +// A helper macro that declares hash table with allocator with stack memory, +// and the initial size of the allocator is in sync with initial size of the +// hash table. +#define HASH_TABLE_WITH_STACK_MEMORY(HASH_TABLE_VARIANT, ...) \ + HASH_TABLE_VARIANT<__VA_ARGS__, \ + HashTableAllocatorWithStackMemory< \ + HASH_TABLE_VARIANT<__VA_ARGS__>::initial_buffer_bytes \ + > \ + > diff --git a/src/Common/HashTable/HashTableAllocator.h b/src/Common/HashTable/HashTableAllocator.h index 99f9c979685..47e3fdfc4b6 100644 --- a/src/Common/HashTable/HashTableAllocator.h +++ b/src/Common/HashTable/HashTableAllocator.h @@ -10,5 +10,5 @@ */ using HashTableAllocator = Allocator; -template -using HashTableAllocatorWithStackMemory = AllocatorWithStackMemory; +template +using HashTableAllocatorWithStackMemory = AllocatorWithStackMemory; diff --git a/src/Common/PODArray.h b/src/Common/PODArray.h index 8fe1f74484e..8b30b314e91 100644 --- a/src/Common/PODArray.h +++ b/src/Common/PODArray.h @@ -85,6 +85,11 @@ protected: static_assert(pad_left <= EmptyPODArraySize && "Left Padding exceeds EmptyPODArraySize. Is the element size too large?"); + // If we are using allocator with inline memory, the minimal size of + // array must be in sync with the size of this memory. + static_assert(allocatorInitialBytes == 0 + || allocatorInitialBytes == initial_bytes); + char * c_start = null; /// Does not include pad_left. char * c_end = null; char * c_end_of_storage = null; /// Does not include pad_right. diff --git a/src/Common/SpaceSaving.h b/src/Common/SpaceSaving.h index 9ad7f6275d6..47b49add060 100644 --- a/src/Common/SpaceSaving.h +++ b/src/Common/SpaceSaving.h @@ -67,9 +67,7 @@ private: template < typename TKey, - typename Hash = DefaultHash, - typename Grower = HashTableGrower<>, - typename Allocator = HashTableAllocator + typename Hash = DefaultHash > class SpaceSaving { @@ -380,7 +378,8 @@ private: counter_map[counter->key] = counter; } - using CounterMap = HashMap; + using CounterMap = HASH_TABLE_WITH_STACK_MEMORY(HashMap, + TKey, Counter *, Hash, HashTableGrower<4>); CounterMap counter_map; std::vector counter_list; diff --git a/src/Common/tests/pod_array.cpp b/src/Common/tests/pod_array.cpp index de15b485411..6e9634ba3cf 100644 --- a/src/Common/tests/pod_array.cpp +++ b/src/Common/tests/pod_array.cpp @@ -18,9 +18,9 @@ static void test1() { using namespace DB; - static constexpr size_t initial_size = 8; - static constexpr size_t stack_threshold = 32; - using Array = PODArray, stack_threshold>>; + static constexpr size_t initial_bytes = 32; + using Array = PODArray, initial_bytes>>; bool res = true; @@ -139,9 +139,9 @@ static void test2() { using namespace DB; - static constexpr size_t initial_size = 8; - static constexpr size_t stack_threshold = 32; - using Array = PODArray, stack_threshold>>; + static constexpr size_t initial_bytes = 32; + using Array = PODArray, initial_bytes>>; bool res = true; @@ -389,9 +389,9 @@ static void test3() { using namespace DB; - static constexpr size_t initial_size = 8; - static constexpr size_t stack_threshold = 32; - using Array = PODArray, stack_threshold>>; + static constexpr size_t initial_bytes = 32; + using Array = PODArray, initial_bytes>>; bool res = true; diff --git a/src/Functions/array/arrayDistinct.cpp b/src/Functions/array/arrayDistinct.cpp index d24de638865..d3da77e43d2 100644 --- a/src/Functions/array/arrayDistinct.cpp +++ b/src/Functions/array/arrayDistinct.cpp @@ -153,10 +153,8 @@ bool FunctionArrayDistinct::executeNumber( if (nullable_col) src_null_map = &nullable_col->getNullMapData(); - using Set = ClearableHashSet, - HashTableGrower, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(T)>>; + using Set = HASH_TABLE_WITH_STACK_MEMORY(ClearableHashSet, + T, DefaultHash, HashTableGrower); Set set; @@ -201,10 +199,8 @@ bool FunctionArrayDistinct::executeString( ColumnString & res_data_column_string = typeid_cast(res_data_col); - using Set = ClearableHashSet, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>; + using Set = HASH_TABLE_WITH_STACK_MEMORY(ClearableHashSet, + StringRef, StringRefHash, HashTableGrower); const PaddedPODArray * src_null_map = nullptr; @@ -249,8 +245,8 @@ void FunctionArrayDistinct::executeHashed( ColumnArray::Offsets & res_offsets, const ColumnNullable * nullable_col) { - using Set = ClearableHashSet, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>; + using Set = HASH_TABLE_WITH_STACK_MEMORY(ClearableHashSet, + UInt128, UInt128TrivialHash, HashTableGrower); const PaddedPODArray * src_null_map = nullptr; diff --git a/src/Functions/array/arrayEnumerateExtended.h b/src/Functions/array/arrayEnumerateExtended.h index 69db59e698e..e97766c5827 100644 --- a/src/Functions/array/arrayEnumerateExtended.h +++ b/src/Functions/array/arrayEnumerateExtended.h @@ -64,36 +64,46 @@ private: template struct MethodOneNumber { - using Set = ClearableHashMap, HashTableGrower, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(T)>>; + using Set = HASH_TABLE_WITH_STACK_MEMORY(ClearableHashMap, + T, UInt32, DefaultHash, + HashTableGrower); + using Method = ColumnsHashing::HashMethodOneNumber; }; struct MethodString { - using Set = ClearableHashMap, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>; + using Set = HASH_TABLE_WITH_STACK_MEMORY(ClearableHashMap, + StringRef, UInt32, StringRefHash, + HashTableGrower); + using Method = ColumnsHashing::HashMethodString; }; struct MethodFixedString { - using Set = ClearableHashMap, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>; + using Set = HASH_TABLE_WITH_STACK_MEMORY(ClearableHashMap, + StringRef, UInt32, StringRefHash, + HashTableGrower); + using Method = ColumnsHashing::HashMethodFixedString; }; struct MethodFixed { - using Set = ClearableHashMap, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>; + using Set = HASH_TABLE_WITH_STACK_MEMORY(ClearableHashMap, + UInt128, UInt32, UInt128HashCRC32, + HashTableGrower); + using Method = ColumnsHashing::HashMethodKeysFixed; }; struct MethodHashed { - using Set = ClearableHashMap, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>; + using Set = HASH_TABLE_WITH_STACK_MEMORY(ClearableHashMap, + UInt128, UInt32, UInt128TrivialHash, + HashTableGrower); + using Method = ColumnsHashing::HashMethodHashed; }; diff --git a/src/Functions/array/arrayEnumerateRanked.h b/src/Functions/array/arrayEnumerateRanked.h index 133f5d7cb81..1700260dd9f 100644 --- a/src/Functions/array/arrayEnumerateRanked.h +++ b/src/Functions/array/arrayEnumerateRanked.h @@ -308,12 +308,10 @@ void FunctionArrayEnumerateRankedExtended::executeMethodImpl( const size_t depth_to_look = arrays_depths.max_array_depth; const auto & offsets = *offsets_by_depth[depth_to_look - 1]; - using Map = ClearableHashMap< - UInt128, - UInt32, - UInt128TrivialHash, - HashTableGrower, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>; + using Map = HASH_TABLE_WITH_STACK_MEMORY(ClearableHashMap, + UInt128, UInt32, UInt128TrivialHash, + HashTableGrower); + Map indices; std::vector indices_by_depth(depth_to_look); diff --git a/src/Functions/array/arrayIntersect.cpp b/src/Functions/array/arrayIntersect.cpp index cccad7c7a03..acd162974c9 100644 --- a/src/Functions/array/arrayIntersect.cpp +++ b/src/Functions/array/arrayIntersect.cpp @@ -418,16 +418,18 @@ void FunctionArrayIntersect::executeImpl(Block & block, const ColumnNumbers & ar TypeListNativeNumbers::forEach(NumberExecutor(arrays, not_nullable_nested_return_type, result_column)); TypeListDecimalNumbers::forEach(DecimalExecutor(arrays, not_nullable_nested_return_type, result_column)); - using DateMap = ClearableHashMap, - HashTableGrower, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(DataTypeDate::FieldType)>>; + using DateMap = HASH_TABLE_WITH_STACK_MEMORY(ClearableHashMap, + DataTypeDate::FieldType, size_t, DefaultHash, + HashTableGrower); - using DateTimeMap = ClearableHashMap, - HashTableGrower, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(DataTypeDateTime::FieldType)>>; + using DateTimeMap = HASH_TABLE_WITH_STACK_MEMORY(ClearableHashMap, + DataTypeDateTime::FieldType, size_t, + DefaultHash, + HashTableGrower); - using StringMap = ClearableHashMap, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>; + using StringMap = HASH_TABLE_WITH_STACK_MEMORY(ClearableHashMap, + StringRef, size_t, StringRefHash, + HashTableGrower); if (!result_column) { @@ -455,8 +457,8 @@ void FunctionArrayIntersect::executeImpl(Block & block, const ColumnNumbers & ar template void FunctionArrayIntersect::NumberExecutor::operator()() { - using Map = ClearableHashMap, HashTableGrower, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(T)>>; + using Map = HASH_TABLE_WITH_STACK_MEMORY(ClearableHashMap, + T, size_t, DefaultHash, HashTableGrower); if (!result && typeid_cast *>(data_type.get())) result = execute, true>(arrays, ColumnVector::create()); @@ -465,8 +467,8 @@ void FunctionArrayIntersect::NumberExecutor::operator()() template void FunctionArrayIntersect::DecimalExecutor::operator()() { - using Map = ClearableHashMap, HashTableGrower, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(T)>>; + using Map = HASH_TABLE_WITH_STACK_MEMORY(ClearableHashMap, + T, size_t, DefaultHash, HashTableGrower); if (!result) if (auto * decimal = typeid_cast *>(data_type.get())) diff --git a/src/Functions/array/arrayUniq.cpp b/src/Functions/array/arrayUniq.cpp index 02129781c13..a83c535d88d 100644 --- a/src/Functions/array/arrayUniq.cpp +++ b/src/Functions/array/arrayUniq.cpp @@ -66,36 +66,41 @@ private: template struct MethodOneNumber { - using Set = ClearableHashSet, HashTableGrower, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(T)>>; + using Set = HASH_TABLE_WITH_STACK_MEMORY(ClearableHashSet, + T, DefaultHash, HashTableGrower); + using Method = ColumnsHashing::HashMethodOneNumber; }; struct MethodString { - using Set = ClearableHashSet, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>; + using Set = HASH_TABLE_WITH_STACK_MEMORY(ClearableHashSet, + StringRef, StringRefHash, HashTableGrower); + using Method = ColumnsHashing::HashMethodString; }; struct MethodFixedString { - using Set = ClearableHashSet, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>; + using Set = HASH_TABLE_WITH_STACK_MEMORY(ClearableHashSet, + StringRef, StringRefHash, HashTableGrower); + using Method = ColumnsHashing::HashMethodFixedString; }; struct MethodFixed { - using Set = ClearableHashSet, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>; + using Set = HASH_TABLE_WITH_STACK_MEMORY(ClearableHashSet, + UInt128, UInt128HashCRC32, HashTableGrower); + using Method = ColumnsHashing::HashMethodKeysFixed; }; struct MethodHashed { - using Set = ClearableHashSet, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>; + using Set = HASH_TABLE_WITH_STACK_MEMORY(ClearableHashSet, + UInt128, UInt128TrivialHash, HashTableGrower); + using Method = ColumnsHashing::HashMethodHashed; }; diff --git a/src/Interpreters/tests/hash_map3.cpp b/src/Interpreters/tests/hash_map3.cpp index 2207edc6cc1..804b084b2a1 100644 --- a/src/Interpreters/tests/hash_map3.cpp +++ b/src/Interpreters/tests/hash_map3.cpp @@ -61,12 +61,11 @@ struct Grower : public HashTableGrower<2> int main(int, char **) { - using Map = HashMapWithDump< + using Map = HASH_TABLE_WITH_STACK_MEMORY(HashMapWithDump, StringRef, UInt64, SimpleHash, - Grower, - HashTableAllocatorWithStackMemory<4 * 24>>; + Grower); Map map;