mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
Optimize one nullable key aggregate performance (#45772)
This commit is contained in:
parent
81b30021db
commit
d4c5ab9dcd
@ -16,7 +16,6 @@
|
||||
#include <memory>
|
||||
#include <cassert>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
@ -29,26 +28,42 @@ namespace ColumnsHashing
|
||||
|
||||
/// For the case when there is one numeric key.
|
||||
/// UInt8/16/32/64 for any type with corresponding bit width.
|
||||
template <typename Value, typename Mapped, typename FieldType, bool use_cache = true, bool need_offset = false>
|
||||
template <typename Value, typename Mapped, typename FieldType, bool use_cache = true, bool need_offset = false, bool nullable = false>
|
||||
struct HashMethodOneNumber
|
||||
: public columns_hashing_impl::HashMethodBase<HashMethodOneNumber<Value, Mapped, FieldType, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
|
||||
: public columns_hashing_impl::HashMethodBase<HashMethodOneNumber<Value, Mapped, FieldType, use_cache, need_offset, nullable>, Value, Mapped, use_cache, need_offset, nullable>
|
||||
{
|
||||
using Self = HashMethodOneNumber<Value, Mapped, FieldType, use_cache, need_offset>;
|
||||
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
|
||||
using Self = HashMethodOneNumber<Value, Mapped, FieldType, use_cache, need_offset, nullable>;
|
||||
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset, nullable>;
|
||||
|
||||
static constexpr bool has_cheap_key_calculation = true;
|
||||
|
||||
const char * vec;
|
||||
|
||||
/// If the keys of a fixed length then key_sizes contains their lengths, empty otherwise.
|
||||
HashMethodOneNumber(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &)
|
||||
HashMethodOneNumber(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) : Base(key_columns[0])
|
||||
{
|
||||
vec = key_columns[0]->getRawData().data();
|
||||
if constexpr (nullable)
|
||||
{
|
||||
const auto * null_column = checkAndGetColumn<ColumnNullable>(key_columns[0]);
|
||||
vec = null_column->getNestedColumnPtr()->getRawData().data();
|
||||
}
|
||||
else
|
||||
{
|
||||
vec = key_columns[0]->getRawData().data();
|
||||
}
|
||||
}
|
||||
|
||||
explicit HashMethodOneNumber(const IColumn * column)
|
||||
explicit HashMethodOneNumber(const IColumn * column) : Base(column)
|
||||
{
|
||||
vec = column->getRawData().data();
|
||||
if constexpr (nullable)
|
||||
{
|
||||
const auto * null_column = checkAndGetColumn<ColumnNullable>(column);
|
||||
vec = null_column->getNestedColumnPtr()->getRawData().data();
|
||||
}
|
||||
else
|
||||
{
|
||||
vec = column->getRawData().data();
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates context. Method is called once and result context is used in all threads.
|
||||
@ -73,22 +88,30 @@ struct HashMethodOneNumber
|
||||
|
||||
|
||||
/// For the case when there is one string key.
|
||||
template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true, bool need_offset = false>
|
||||
template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true, bool need_offset = false, bool nullable = false>
|
||||
struct HashMethodString
|
||||
: public columns_hashing_impl::HashMethodBase<HashMethodString<Value, Mapped, place_string_to_arena, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
|
||||
: public columns_hashing_impl::HashMethodBase<HashMethodString<Value, Mapped, place_string_to_arena, use_cache, need_offset, nullable>, Value, Mapped, use_cache, need_offset, nullable>
|
||||
{
|
||||
using Self = HashMethodString<Value, Mapped, place_string_to_arena, use_cache, need_offset>;
|
||||
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
|
||||
using Self = HashMethodString<Value, Mapped, place_string_to_arena, use_cache, need_offset, nullable>;
|
||||
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset, nullable>;
|
||||
|
||||
static constexpr bool has_cheap_key_calculation = false;
|
||||
|
||||
const IColumn::Offset * offsets;
|
||||
const UInt8 * chars;
|
||||
|
||||
HashMethodString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &)
|
||||
HashMethodString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) : Base(key_columns[0])
|
||||
{
|
||||
const IColumn & column = *key_columns[0];
|
||||
const ColumnString & column_string = assert_cast<const ColumnString &>(column);
|
||||
const IColumn * column;
|
||||
if constexpr (nullable)
|
||||
{
|
||||
column = checkAndGetColumn<ColumnNullable>(key_columns[0])->getNestedColumnPtr().get();
|
||||
}
|
||||
else
|
||||
{
|
||||
column = key_columns[0];
|
||||
}
|
||||
const ColumnString & column_string = assert_cast<const ColumnString &>(*column);
|
||||
offsets = column_string.getOffsets().data();
|
||||
chars = column_string.getChars().data();
|
||||
}
|
||||
@ -108,28 +131,35 @@ struct HashMethodString
|
||||
}
|
||||
|
||||
protected:
|
||||
friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
|
||||
friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset, nullable>;
|
||||
};
|
||||
|
||||
|
||||
/// For the case when there is one fixed-length string key.
|
||||
template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true, bool need_offset = false>
|
||||
template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true, bool need_offset = false, bool nullable = false>
|
||||
struct HashMethodFixedString
|
||||
: public columns_hashing_impl::
|
||||
HashMethodBase<HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
|
||||
: public columns_hashing_impl::HashMethodBase<HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache, need_offset, nullable>, Value, Mapped, use_cache, need_offset, nullable>
|
||||
{
|
||||
using Self = HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache, need_offset>;
|
||||
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
|
||||
using Self = HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache, need_offset, nullable>;
|
||||
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset, nullable>;
|
||||
|
||||
static constexpr bool has_cheap_key_calculation = false;
|
||||
|
||||
size_t n;
|
||||
const ColumnFixedString::Chars * chars;
|
||||
|
||||
HashMethodFixedString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &)
|
||||
HashMethodFixedString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) : Base(key_columns[0])
|
||||
{
|
||||
const IColumn & column = *key_columns[0];
|
||||
const ColumnFixedString & column_string = assert_cast<const ColumnFixedString &>(column);
|
||||
const IColumn * column;
|
||||
if constexpr (nullable)
|
||||
{
|
||||
column = checkAndGetColumn<ColumnNullable>(key_columns[0])->getNestedColumnPtr().get();
|
||||
}
|
||||
else
|
||||
{
|
||||
column = key_columns[0];
|
||||
}
|
||||
const ColumnFixedString & column_string = assert_cast<const ColumnFixedString &>(*column);
|
||||
n = column_string.getN();
|
||||
chars = &column_string.getChars();
|
||||
}
|
||||
@ -149,7 +179,7 @@ struct HashMethodFixedString
|
||||
}
|
||||
|
||||
protected:
|
||||
friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
|
||||
friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset, nullable>;
|
||||
};
|
||||
|
||||
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include <Common/HashTable/HashTableKeyHolder.h>
|
||||
#include <Interpreters/AggregationCommon.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
@ -140,7 +139,7 @@ public:
|
||||
FindResultImpl(bool found_, size_t off) : FindResultImplBase(found_), FindResultImplOffsetBase<need_offset>(off) {}
|
||||
};
|
||||
|
||||
template <typename Derived, typename Value, typename Mapped, bool consecutive_keys_optimization, bool need_offset = false>
|
||||
template <typename Derived, typename Value, typename Mapped, bool consecutive_keys_optimization, bool need_offset = false, bool nullable = false>
|
||||
class HashMethodBase
|
||||
{
|
||||
public:
|
||||
@ -154,6 +153,19 @@ public:
|
||||
template <typename Data>
|
||||
ALWAYS_INLINE EmplaceResult emplaceKey(Data & data, size_t row, Arena & pool)
|
||||
{
|
||||
if constexpr (nullable)
|
||||
{
|
||||
if (isNullAt(row))
|
||||
{
|
||||
bool has_null_key = data.hasNullKeyData();
|
||||
data.hasNullKeyData() = true;
|
||||
|
||||
if constexpr (has_mapped)
|
||||
return EmplaceResult(data.getNullKeyData(), data.getNullKeyData(), !has_null_key);
|
||||
else
|
||||
return EmplaceResult(!has_null_key);
|
||||
}
|
||||
}
|
||||
auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool);
|
||||
return emplaceImpl(key_holder, data);
|
||||
}
|
||||
@ -161,6 +173,16 @@ public:
|
||||
template <typename Data>
|
||||
ALWAYS_INLINE FindResult findKey(Data & data, size_t row, Arena & pool)
|
||||
{
|
||||
if constexpr (nullable)
|
||||
{
|
||||
if (isNullAt(row))
|
||||
{
|
||||
if constexpr (has_mapped)
|
||||
return FindResult(&data.getNullKeyData(), data.hasNullKeyData(), 0);
|
||||
else
|
||||
return FindResult(data.hasNullKeyData(), 0);
|
||||
}
|
||||
}
|
||||
auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool);
|
||||
return findKeyImpl(keyHolderGetKey(key_holder), data);
|
||||
}
|
||||
@ -172,10 +194,25 @@ public:
|
||||
return data.hash(keyHolderGetKey(key_holder));
|
||||
}
|
||||
|
||||
ALWAYS_INLINE bool isNullAt(size_t row) const
|
||||
{
|
||||
if constexpr (nullable)
|
||||
{
|
||||
return null_map->getBool(row);
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
Cache cache;
|
||||
const IColumn * null_map = nullptr;
|
||||
bool has_null_data = false;
|
||||
|
||||
HashMethodBase()
|
||||
/// column argument only for nullable column
|
||||
explicit HashMethodBase(const IColumn * column = nullptr)
|
||||
{
|
||||
if constexpr (consecutive_keys_optimization)
|
||||
{
|
||||
@ -188,6 +225,11 @@ protected:
|
||||
else
|
||||
cache.value = Value();
|
||||
}
|
||||
if constexpr (nullable)
|
||||
{
|
||||
|
||||
null_map = &checkAndGetColumn<ColumnNullable>(column)->getNullMapColumn();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Data, typename KeyHolder>
|
||||
@ -293,7 +335,6 @@ protected:
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <typename T>
|
||||
struct MappedCache : public PaddedPODArray<T> {};
|
||||
|
||||
|
@ -9,7 +9,7 @@ class TwoLevelStringHashMap : public TwoLevelStringHashTable<StringHashMapSubMap
|
||||
public:
|
||||
using Key = StringRef;
|
||||
using Self = TwoLevelStringHashMap;
|
||||
using Base = TwoLevelStringHashTable<StringHashMapSubMaps<TMapped, Allocator>, StringHashMap<TMapped, Allocator>>;
|
||||
using Base = TwoLevelStringHashTable<StringHashMapSubMaps<TMapped, Allocator>, ImplTable<TMapped, Allocator>>;
|
||||
using LookupResult = typename Base::LookupResult;
|
||||
|
||||
using Base::Base;
|
||||
|
@ -775,6 +775,31 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod()
|
||||
|
||||
if (has_nullable_key)
|
||||
{
|
||||
/// Optimization for one key
|
||||
if (params.keys_size == 1 && !has_low_cardinality)
|
||||
{
|
||||
if (types_removed_nullable[0]->isValueRepresentedByNumber())
|
||||
{
|
||||
size_t size_of_field = types_removed_nullable[0]->getSizeOfValueInMemory();
|
||||
if (size_of_field == 1)
|
||||
return AggregatedDataVariants::Type::nullable_key8;
|
||||
if (size_of_field == 2)
|
||||
return AggregatedDataVariants::Type::nullable_key16;
|
||||
if (size_of_field == 4)
|
||||
return AggregatedDataVariants::Type::nullable_key32;
|
||||
if (size_of_field == 8)
|
||||
return AggregatedDataVariants::Type::nullable_key64;
|
||||
}
|
||||
if (isFixedString(types_removed_nullable[0]))
|
||||
{
|
||||
return AggregatedDataVariants::Type::nullable_key_fixed_string;
|
||||
}
|
||||
if (isString(types_removed_nullable[0]))
|
||||
{
|
||||
return AggregatedDataVariants::Type::nullable_key_string;
|
||||
}
|
||||
}
|
||||
|
||||
if (params.keys_size == num_fixed_contiguous_keys && !has_low_cardinality)
|
||||
{
|
||||
/// Pack if possible all the keys along with information about which key values are nulls
|
||||
@ -1889,7 +1914,7 @@ inline void Aggregator::insertAggregatesIntoColumns(Mapped & mapped, MutableColu
|
||||
|
||||
|
||||
template <bool use_compiled_functions>
|
||||
Block Aggregator::insertResultsIntoColumns(PaddedPODArray<AggregateDataPtr> & places, OutputBlockColumns && out_cols, Arena * arena) const
|
||||
Block Aggregator::insertResultsIntoColumns(PaddedPODArray<AggregateDataPtr> & places, OutputBlockColumns && out_cols, Arena * arena, bool has_null_key_data [[maybe_unused]]) const
|
||||
{
|
||||
std::exception_ptr exception;
|
||||
size_t aggregate_functions_destroy_index = 0;
|
||||
@ -1912,8 +1937,12 @@ Block Aggregator::insertResultsIntoColumns(PaddedPODArray<AggregateDataPtr> & pl
|
||||
continue;
|
||||
|
||||
auto & final_aggregate_column = out_cols.final_aggregate_columns[i];
|
||||
final_aggregate_column = final_aggregate_column->cloneResized(places.size());
|
||||
columns_data.emplace_back(getColumnData(final_aggregate_column.get()));
|
||||
/**
|
||||
* In convertToBlockImplFinal, additional data with a key of null may be written,
|
||||
* and additional memory for null data needs to be allocated when using the compiled function
|
||||
*/
|
||||
final_aggregate_column = final_aggregate_column->cloneResized(places.size() + (has_null_key_data ? 1 : 0));
|
||||
columns_data.emplace_back(getColumnData(final_aggregate_column.get(), (has_null_key_data ? 1 : 0)));
|
||||
}
|
||||
|
||||
auto insert_aggregates_into_columns_function = compiled_functions.insert_aggregates_into_columns_function;
|
||||
@ -1982,15 +2011,21 @@ Aggregator::convertToBlockImplFinal(Method & method, Table & data, Arena * arena
|
||||
std::optional<OutputBlockColumns> out_cols;
|
||||
std::optional<Sizes> shuffled_key_sizes;
|
||||
PaddedPODArray<AggregateDataPtr> places;
|
||||
bool has_null_key_data = false;
|
||||
|
||||
auto init_out_cols = [&]()
|
||||
{
|
||||
out_cols = prepareOutputBlockColumns(params, aggregate_functions, getHeader(final), aggregates_pools, final, max_block_size);
|
||||
|
||||
if constexpr (Method::low_cardinality_optimization)
|
||||
if constexpr (Method::low_cardinality_optimization || Method::one_key_nullable_optimization)
|
||||
{
|
||||
/**
|
||||
* When one_key_nullable_optimization is enabled, null data will be written to the key column and result column in advance.
|
||||
* And in insertResultsIntoColumns need to allocate memory for null data.
|
||||
*/
|
||||
if (data.hasNullKeyData())
|
||||
{
|
||||
has_null_key_data = Method::one_key_nullable_optimization;
|
||||
out_cols->key_columns[0]->insertDefault();
|
||||
insertAggregatesIntoColumns(data.getNullKeyData(), out_cols->final_aggregate_columns, arena);
|
||||
data.hasNullKeyData() = false;
|
||||
@ -2022,7 +2057,7 @@ Aggregator::convertToBlockImplFinal(Method & method, Table & data, Arena * arena
|
||||
{
|
||||
if (places.size() >= max_block_size)
|
||||
{
|
||||
res.emplace_back(insertResultsIntoColumns<use_compiled_functions>(places, std::move(out_cols.value()), arena));
|
||||
res.emplace_back(insertResultsIntoColumns<use_compiled_functions>(places, std::move(out_cols.value()), arena, has_null_key_data));
|
||||
places.clear();
|
||||
out_cols.reset();
|
||||
}
|
||||
@ -2031,12 +2066,12 @@ Aggregator::convertToBlockImplFinal(Method & method, Table & data, Arena * arena
|
||||
|
||||
if constexpr (return_single_block)
|
||||
{
|
||||
return insertResultsIntoColumns<use_compiled_functions>(places, std::move(out_cols.value()), arena);
|
||||
return insertResultsIntoColumns<use_compiled_functions>(places, std::move(out_cols.value()), arena, has_null_key_data);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (out_cols.has_value())
|
||||
res.emplace_back(insertResultsIntoColumns<use_compiled_functions>(places, std::move(out_cols.value()), arena));
|
||||
res.emplace_back(insertResultsIntoColumns<use_compiled_functions>(places, std::move(out_cols.value()), arena, has_null_key_data));
|
||||
return res;
|
||||
}
|
||||
}
|
||||
@ -2056,7 +2091,7 @@ Aggregator::convertToBlockImplNotFinal(Method & method, Table & data, Arenas & a
|
||||
{
|
||||
out_cols = prepareOutputBlockColumns(params, aggregate_functions, getHeader(final), aggregates_pools, final, max_block_size);
|
||||
|
||||
if constexpr (Method::low_cardinality_optimization)
|
||||
if constexpr (Method::low_cardinality_optimization || Method::one_key_nullable_optimization)
|
||||
{
|
||||
if (data.hasNullKeyData())
|
||||
{
|
||||
@ -2397,7 +2432,7 @@ void NO_INLINE Aggregator::mergeDataNullKey(
|
||||
Table & table_src,
|
||||
Arena * arena) const
|
||||
{
|
||||
if constexpr (Method::low_cardinality_optimization)
|
||||
if constexpr (Method::low_cardinality_optimization || Method::one_key_nullable_optimization)
|
||||
{
|
||||
if (table_src.hasNullKeyData())
|
||||
{
|
||||
@ -2429,7 +2464,7 @@ void NO_INLINE Aggregator::mergeDataNullKey(
|
||||
template <typename Method, bool use_compiled_functions, bool prefetch, typename Table>
|
||||
void NO_INLINE Aggregator::mergeDataImpl(Table & table_dst, Table & table_src, Arena * arena) const
|
||||
{
|
||||
if constexpr (Method::low_cardinality_optimization)
|
||||
if constexpr (Method::low_cardinality_optimization || Method::one_key_nullable_optimization)
|
||||
mergeDataNullKey<Method, Table>(table_dst, table_src, arena);
|
||||
|
||||
auto merge = [&](AggregateDataPtr & __restrict dst, AggregateDataPtr & __restrict src, bool inserted)
|
||||
@ -2490,7 +2525,7 @@ void NO_INLINE Aggregator::mergeDataNoMoreKeysImpl(
|
||||
Arena * arena) const
|
||||
{
|
||||
/// Note : will create data for NULL key if not exist
|
||||
if constexpr (Method::low_cardinality_optimization)
|
||||
if constexpr (Method::low_cardinality_optimization || Method::one_key_nullable_optimization)
|
||||
mergeDataNullKey<Method, Table>(table_dst, table_src, arena);
|
||||
|
||||
table_src.mergeToViaFind(table_dst, [&](AggregateDataPtr dst, AggregateDataPtr & src, bool found)
|
||||
@ -2518,7 +2553,7 @@ void NO_INLINE Aggregator::mergeDataOnlyExistingKeysImpl(
|
||||
Arena * arena) const
|
||||
{
|
||||
/// Note : will create data for NULL key if not exist
|
||||
if constexpr (Method::low_cardinality_optimization)
|
||||
if constexpr (Method::low_cardinality_optimization || Method::one_key_nullable_optimization)
|
||||
mergeDataNullKey<Method, Table>(table_dst, table_src, arena);
|
||||
|
||||
table_src.mergeToViaFind(table_dst,
|
||||
@ -3194,7 +3229,7 @@ void NO_INLINE Aggregator::convertBlockToTwoLevelImpl(
|
||||
/// For every row.
|
||||
for (size_t i = 0; i < rows; ++i)
|
||||
{
|
||||
if constexpr (Method::low_cardinality_optimization)
|
||||
if constexpr (Method::low_cardinality_optimization || Method::one_key_nullable_optimization)
|
||||
{
|
||||
if (state.isNullAt(i))
|
||||
{
|
||||
|
@ -159,10 +159,17 @@ using StringHashTableWithNullKey = AggregationDataWithNullKey<StringHashMap<Type
|
||||
|
||||
using AggregatedDataWithNullableUInt8Key = AggregationDataWithNullKey<AggregatedDataWithUInt8Key>;
|
||||
using AggregatedDataWithNullableUInt16Key = AggregationDataWithNullKey<AggregatedDataWithUInt16Key>;
|
||||
using AggregatedDataWithNullableUInt32Key = AggregationDataWithNullKey<AggregatedDataWithUInt32Key>;
|
||||
|
||||
|
||||
using AggregatedDataWithNullableUInt64Key = AggregationDataWithNullKey<AggregatedDataWithUInt64Key>;
|
||||
using AggregatedDataWithNullableStringKey = AggregationDataWithNullKey<AggregatedDataWithStringKey>;
|
||||
using AggregatedDataWithNullableShortStringKey = AggregationDataWithNullKey<AggregatedDataWithShortStringKey>;
|
||||
|
||||
|
||||
using AggregatedDataWithNullableUInt32KeyTwoLevel = AggregationDataWithNullKeyTwoLevel<
|
||||
TwoLevelHashMap<UInt32, AggregateDataPtr, HashCRC32<UInt32>,
|
||||
TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>;
|
||||
using AggregatedDataWithNullableUInt64KeyTwoLevel = AggregationDataWithNullKeyTwoLevel<
|
||||
TwoLevelHashMap<UInt64, AggregateDataPtr, HashCRC32<UInt64>,
|
||||
TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>;
|
||||
@ -174,11 +181,10 @@ using AggregatedDataWithNullableStringKeyTwoLevel = AggregationDataWithNullKeyTw
|
||||
TwoLevelHashMapWithSavedHash<StringRef, AggregateDataPtr, DefaultHash<StringRef>,
|
||||
TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>;
|
||||
|
||||
|
||||
/// For the case where there is one numeric key.
|
||||
/// FieldType is UInt8/16/32/64 for any type with corresponding bit width.
|
||||
template <typename FieldType, typename TData,
|
||||
bool consecutive_keys_optimization = true>
|
||||
bool consecutive_keys_optimization = true, bool nullable = false>
|
||||
struct AggregationMethodOneNumber
|
||||
{
|
||||
using Data = TData;
|
||||
@ -198,10 +204,11 @@ struct AggregationMethodOneNumber
|
||||
|
||||
/// To use one `Method` in different threads, use different `State`.
|
||||
using State = ColumnsHashing::HashMethodOneNumber<typename Data::value_type,
|
||||
Mapped, FieldType, consecutive_keys_optimization>;
|
||||
Mapped, FieldType, consecutive_keys_optimization, false, nullable>;
|
||||
|
||||
/// Use optimization for low cardinality.
|
||||
static const bool low_cardinality_optimization = false;
|
||||
static const bool one_key_nullable_optimization = nullable;
|
||||
|
||||
/// Shuffle key columns before `insertKeyIntoColumns` call if needed.
|
||||
std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; }
|
||||
@ -209,9 +216,20 @@ struct AggregationMethodOneNumber
|
||||
// Insert the key from the hash table into columns.
|
||||
static void insertKeyIntoColumns(const Key & key, std::vector<IColumn *> & key_columns, const Sizes & /*key_sizes*/)
|
||||
{
|
||||
ColumnVectorHelper * column;
|
||||
if constexpr (nullable)
|
||||
{
|
||||
ColumnNullable & nullable_col = assert_cast<ColumnNullable &>(*key_columns[0]);
|
||||
ColumnUInt8 * null_map = assert_cast<ColumnUInt8 *>(&nullable_col.getNullMapColumn());
|
||||
null_map->insertDefault();
|
||||
column = static_cast<ColumnVectorHelper *>(&nullable_col.getNestedColumn());
|
||||
}
|
||||
else
|
||||
{
|
||||
column = static_cast<ColumnVectorHelper *>(key_columns[0]);
|
||||
}
|
||||
static_assert(sizeof(FieldType) <= sizeof(Key));
|
||||
const auto * key_holder = reinterpret_cast<const char *>(&key);
|
||||
auto * column = static_cast<ColumnVectorHelper *>(key_columns[0]);
|
||||
if constexpr (sizeof(FieldType) < sizeof(Key) && std::endian::native == std::endian::big)
|
||||
column->insertRawData<sizeof(FieldType)>(key_holder + (sizeof(Key) - sizeof(FieldType)));
|
||||
else
|
||||
@ -242,6 +260,7 @@ struct AggregationMethodString
|
||||
using State = ColumnsHashing::HashMethodString<typename Data::value_type, Mapped>;
|
||||
|
||||
static const bool low_cardinality_optimization = false;
|
||||
static const bool one_key_nullable_optimization = false;
|
||||
|
||||
std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; }
|
||||
|
||||
@ -253,7 +272,7 @@ struct AggregationMethodString
|
||||
|
||||
|
||||
/// Same as above but without cache
|
||||
template <typename TData>
|
||||
template <typename TData, bool nullable = false>
|
||||
struct AggregationMethodStringNoCache
|
||||
{
|
||||
using Data = TData;
|
||||
@ -271,15 +290,23 @@ struct AggregationMethodStringNoCache
|
||||
{
|
||||
}
|
||||
|
||||
using State = ColumnsHashing::HashMethodString<typename Data::value_type, Mapped, true, false>;
|
||||
using State = ColumnsHashing::HashMethodString<typename Data::value_type, Mapped, true, false, false ,nullable>;
|
||||
|
||||
static const bool low_cardinality_optimization = false;
|
||||
static const bool one_key_nullable_optimization = nullable;
|
||||
|
||||
std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; }
|
||||
|
||||
static void insertKeyIntoColumns(StringRef key, std::vector<IColumn *> & key_columns, const Sizes &)
|
||||
{
|
||||
static_cast<ColumnString *>(key_columns[0])->insertData(key.data, key.size);
|
||||
if constexpr (nullable)
|
||||
{
|
||||
static_cast<ColumnNullable *>(key_columns[0])->insertData(key.data, key.size);
|
||||
}
|
||||
else
|
||||
{
|
||||
static_cast<ColumnString *>(key_columns[0])->insertData(key.data, key.size);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@ -306,6 +333,7 @@ struct AggregationMethodFixedString
|
||||
using State = ColumnsHashing::HashMethodFixedString<typename Data::value_type, Mapped>;
|
||||
|
||||
static const bool low_cardinality_optimization = false;
|
||||
static const bool one_key_nullable_optimization = false;
|
||||
|
||||
std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; }
|
||||
|
||||
@ -316,7 +344,7 @@ struct AggregationMethodFixedString
|
||||
};
|
||||
|
||||
/// Same as above but without cache
|
||||
template <typename TData>
|
||||
template <typename TData, bool nullable = false>
|
||||
struct AggregationMethodFixedStringNoCache
|
||||
{
|
||||
using Data = TData;
|
||||
@ -334,15 +362,23 @@ struct AggregationMethodFixedStringNoCache
|
||||
{
|
||||
}
|
||||
|
||||
using State = ColumnsHashing::HashMethodFixedString<typename Data::value_type, Mapped, true, false>;
|
||||
using State = ColumnsHashing::HashMethodFixedString<typename Data::value_type, Mapped, true, false, false, nullable>;
|
||||
|
||||
static const bool low_cardinality_optimization = false;
|
||||
static const bool one_key_nullable_optimization = nullable;
|
||||
|
||||
std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; }
|
||||
|
||||
static void insertKeyIntoColumns(StringRef key, std::vector<IColumn *> & key_columns, const Sizes &)
|
||||
{
|
||||
static_cast<ColumnFixedString *>(key_columns[0])->insertData(key.data, key.size);
|
||||
if constexpr (nullable)
|
||||
{
|
||||
static_cast<ColumnNullable *>(key_columns[0])->insertData(key.data, key.size);
|
||||
}
|
||||
else
|
||||
{
|
||||
static_cast<ColumnFixedString *>(key_columns[0])->insertData(key.data, key.size);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@ -418,6 +454,7 @@ struct AggregationMethodKeysFixed
|
||||
use_cache>;
|
||||
|
||||
static const bool low_cardinality_optimization = false;
|
||||
static const bool one_key_nullable_optimization = false;
|
||||
|
||||
std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> & key_columns, const Sizes & key_sizes)
|
||||
{
|
||||
@ -505,6 +542,7 @@ struct AggregationMethodSerialized
|
||||
using State = ColumnsHashing::HashMethodSerialized<typename Data::value_type, Mapped>;
|
||||
|
||||
static const bool low_cardinality_optimization = false;
|
||||
static const bool one_key_nullable_optimization = false;
|
||||
|
||||
std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; }
|
||||
|
||||
@ -588,6 +626,18 @@ struct AggregatedDataVariants : private boost::noncopyable
|
||||
std::unique_ptr<AggregationMethodSerialized<AggregatedDataWithStringKeyHash64>> serialized_hash64;
|
||||
|
||||
/// Support for nullable keys.
|
||||
std::unique_ptr<AggregationMethodOneNumber<UInt8, AggregatedDataWithNullableUInt8Key, false, true>> nullable_key8;
|
||||
std::unique_ptr<AggregationMethodOneNumber<UInt16, AggregatedDataWithNullableUInt16Key, false, true>> nullable_key16;
|
||||
std::unique_ptr<AggregationMethodOneNumber<UInt32, AggregatedDataWithNullableUInt32Key, true, true>> nullable_key32;
|
||||
std::unique_ptr<AggregationMethodOneNumber<UInt64, AggregatedDataWithNullableUInt64Key, true, true>> nullable_key64;
|
||||
std::unique_ptr<AggregationMethodOneNumber<UInt32, AggregatedDataWithNullableUInt32KeyTwoLevel, true, true>> nullable_key32_two_level;
|
||||
std::unique_ptr<AggregationMethodOneNumber<UInt64, AggregatedDataWithNullableUInt64KeyTwoLevel, true, true>> nullable_key64_two_level;
|
||||
|
||||
std::unique_ptr<AggregationMethodStringNoCache<AggregatedDataWithNullableShortStringKey, true>> nullable_key_string;
|
||||
std::unique_ptr<AggregationMethodFixedStringNoCache<AggregatedDataWithNullableShortStringKey, true>> nullable_key_fixed_string;
|
||||
std::unique_ptr<AggregationMethodStringNoCache<AggregatedDataWithNullableShortStringKeyTwoLevel, true>> nullable_key_string_two_level;
|
||||
std::unique_ptr<AggregationMethodFixedStringNoCache<AggregatedDataWithNullableShortStringKeyTwoLevel, true>> nullable_key_fixed_string_two_level;
|
||||
|
||||
std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128, true>> nullable_keys128;
|
||||
std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256, true>> nullable_keys256;
|
||||
std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128TwoLevel, true>> nullable_keys128_two_level;
|
||||
@ -640,6 +690,16 @@ struct AggregatedDataVariants : private boost::noncopyable
|
||||
M(keys128_hash64, false) \
|
||||
M(keys256_hash64, false) \
|
||||
M(serialized_hash64, false) \
|
||||
M(nullable_key8, false) \
|
||||
M(nullable_key16, false) \
|
||||
M(nullable_key32, false) \
|
||||
M(nullable_key64, false) \
|
||||
M(nullable_key32_two_level, true) \
|
||||
M(nullable_key64_two_level, true) \
|
||||
M(nullable_key_string, false) \
|
||||
M(nullable_key_fixed_string, false) \
|
||||
M(nullable_key_string_two_level, true) \
|
||||
M(nullable_key_fixed_string_two_level, true) \
|
||||
M(nullable_keys128, false) \
|
||||
M(nullable_keys256, false) \
|
||||
M(nullable_keys128_two_level, true) \
|
||||
@ -754,6 +814,10 @@ struct AggregatedDataVariants : private boost::noncopyable
|
||||
M(keys128) \
|
||||
M(keys256) \
|
||||
M(serialized) \
|
||||
M(nullable_key32) \
|
||||
M(nullable_key64) \
|
||||
M(nullable_key_string) \
|
||||
M(nullable_key_fixed_string) \
|
||||
M(nullable_keys128) \
|
||||
M(nullable_keys256) \
|
||||
M(low_cardinality_key32) \
|
||||
@ -767,6 +831,8 @@ struct AggregatedDataVariants : private boost::noncopyable
|
||||
#define APPLY_FOR_VARIANTS_NOT_CONVERTIBLE_TO_TWO_LEVEL(M) \
|
||||
M(key8) \
|
||||
M(key16) \
|
||||
M(nullable_key8) \
|
||||
M(nullable_key16) \
|
||||
M(keys16) \
|
||||
M(key64_hash64) \
|
||||
M(key_string_hash64)\
|
||||
@ -810,6 +876,10 @@ struct AggregatedDataVariants : private boost::noncopyable
|
||||
M(keys128_two_level) \
|
||||
M(keys256_two_level) \
|
||||
M(serialized_two_level) \
|
||||
M(nullable_key32_two_level) \
|
||||
M(nullable_key64_two_level) \
|
||||
M(nullable_key_string_two_level) \
|
||||
M(nullable_key_fixed_string_two_level) \
|
||||
M(nullable_keys128_two_level) \
|
||||
M(nullable_keys256_two_level) \
|
||||
M(low_cardinality_key32_two_level) \
|
||||
@ -1290,7 +1360,7 @@ private:
|
||||
Arena * arena) const;
|
||||
|
||||
template <bool use_compiled_functions>
|
||||
Block insertResultsIntoColumns(PaddedPODArray<AggregateDataPtr> & places, OutputBlockColumns && out_cols, Arena * arena) const;
|
||||
Block insertResultsIntoColumns(PaddedPODArray<AggregateDataPtr> & places, OutputBlockColumns && out_cols, Arena * arena, bool has_null_key_data) const;
|
||||
|
||||
template <typename Method, bool use_compiled_functions, bool return_single_block, typename Table>
|
||||
ConvertToBlockRes<return_single_block>
|
||||
|
@ -41,7 +41,7 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
ColumnData getColumnData(const IColumn * column)
|
||||
ColumnData getColumnData(const IColumn * column, size_t skip_rows)
|
||||
{
|
||||
const bool is_const = isColumnConst(*column);
|
||||
|
||||
@ -52,11 +52,11 @@ ColumnData getColumnData(const IColumn * column)
|
||||
|
||||
if (const auto * nullable = typeid_cast<const ColumnNullable *>(column))
|
||||
{
|
||||
result.null_data = nullable->getNullMapColumn().getRawData().data();
|
||||
result.null_data = nullable->getNullMapColumn().getDataAt(skip_rows).data;
|
||||
column = &nullable->getNestedColumn();
|
||||
}
|
||||
|
||||
result.data = column->getRawData().data();
|
||||
/// skip null key data for one nullable key optimization
|
||||
result.data = column->getDataAt(skip_rows).data;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@ -26,7 +26,7 @@ struct ColumnData
|
||||
/** Returns ColumnData for column.
|
||||
* If constant column is passed, LOGICAL_ERROR will be thrown.
|
||||
*/
|
||||
ColumnData getColumnData(const IColumn * column);
|
||||
ColumnData getColumnData(const IColumn * column, size_t skip_rows = 0);
|
||||
|
||||
using ColumnDataRowsOffset = size_t;
|
||||
using ColumnDataRowsSize = size_t;
|
||||
|
49
tests/performance/groupby_onekey_nullable.xml
Normal file
49
tests/performance/groupby_onekey_nullable.xml
Normal file
@ -0,0 +1,49 @@
|
||||
<test>
|
||||
<settings>
|
||||
<max_insert_threads>8</max_insert_threads>
|
||||
<allow_experimental_projection_optimization>0</allow_experimental_projection_optimization>
|
||||
</settings>
|
||||
|
||||
<substitutions>
|
||||
<substitution>
|
||||
<name>key_type</name>
|
||||
<values>
|
||||
<value>key_string</value>
|
||||
<value>key_string_two_level</value>
|
||||
<value>key_fixed_string</value>
|
||||
<value>key_int64</value>
|
||||
<value>key_int64_two_level</value>
|
||||
<value>key_int32</value>
|
||||
<value>key_int32_two_level</value>
|
||||
<value>Key_int16</value>
|
||||
<value>key_int8</value>
|
||||
</values>
|
||||
</substitution>
|
||||
</substitutions>
|
||||
<create_query>
|
||||
CREATE TABLE t_nullable
|
||||
(
|
||||
key_string Nullable(String),
|
||||
key_string_two_level Nullable(String),
|
||||
key_fixed_string Nullable(FixedString(3)),
|
||||
key_int64 Nullable(Int64),
|
||||
key_int64_two_level Nullable(Int64),
|
||||
key_int32 Nullable(Int32),
|
||||
key_int32_two_level Nullable(Int32),
|
||||
Key_int16 Nullable(Int16),
|
||||
key_int8 Nullable(Int8),
|
||||
i1 Nullable(Int64),
|
||||
i2 Nullable(Int64),
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY tuple()
|
||||
</create_query>
|
||||
<fill_query>insert into t_nullable select ['aa','bb','cc','dd'][number % 10000 + 1], ['aa','bb','cc','dd'][number % 200000 + 1], ['aa','bb','cc','dd'][number % 4 + 1], toInt64(number%10000), toInt64(number%200000), toInt32(number%10000), toInt32(number%200000), toInt16(number%10000), toInt8(number%100), number%6000+1, number%5000+2 from numbers_mt(20000000)</fill_query>
|
||||
<fill_query>insert into t_nullable select ['aa','bb','cc','dd'][number % 10000 + 1], ['aa','bb','cc','dd'][number % 200000 + 1], ['aa','bb','cc','dd'][number % 4 + 1], toInt64(number%10000), toInt64(number%200000), toInt32(number%10000), toInt32(number%200000), toInt16(number%10000), toInt8(number%100), number%6000+1, number%5000+2 from numbers_mt(20000000)</fill_query>
|
||||
<fill_query>insert into t_nullable select ['aa','bb','cc','dd'][number % 10000 + 1], ['aa','bb','cc','dd'][number % 200000 + 1], ['aa','bb','cc','dd'][number % 4 + 1], toInt64(number%10000), toInt64(number%200000), toInt32(number%10000), toInt32(number%200000), toInt16(number%10000), toInt8(number%100), number%6000+1, number%5000+2 from numbers_mt(20000000)</fill_query>
|
||||
<fill_query>insert into t_nullable select ['aa','bb','cc','dd'][number % 10000 + 1], ['aa','bb','cc','dd'][number % 200000 + 1], ['aa','bb','cc','dd'][number % 4 + 1], toInt64(number%10000), toInt64(number%200000), toInt32(number%10000), toInt32(number%200000), toInt16(number%10000), toInt8(number%100), number%6000+1, number%5000+2 from numbers_mt(20000000)</fill_query>
|
||||
<fill_query>OPTIMIZE TABLE t_nullable FINAL</fill_query>
|
||||
<query>select min(i1), min(i2) from t_nullable group by {key_type} format Null</query>
|
||||
|
||||
<drop_query>drop table if exists t_nullable</drop_query>
|
||||
</test>
|
Loading…
Reference in New Issue
Block a user