Optimize one nullable key aggregate performance (#45772)

This commit is contained in:
LiuNeng 2023-03-03 04:01:52 +08:00 committed by GitHub
parent 81b30021db
commit d4c5ab9dcd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 285 additions and 60 deletions

View File

@ -16,7 +16,6 @@
#include <memory>
#include <cassert>
namespace DB
{
namespace ErrorCodes
@ -29,26 +28,42 @@ namespace ColumnsHashing
/// For the case when there is one numeric key.
/// UInt8/16/32/64 for any type with corresponding bit width.
template <typename Value, typename Mapped, typename FieldType, bool use_cache = true, bool need_offset = false>
template <typename Value, typename Mapped, typename FieldType, bool use_cache = true, bool need_offset = false, bool nullable = false>
struct HashMethodOneNumber
: public columns_hashing_impl::HashMethodBase<HashMethodOneNumber<Value, Mapped, FieldType, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
: public columns_hashing_impl::HashMethodBase<HashMethodOneNumber<Value, Mapped, FieldType, use_cache, need_offset, nullable>, Value, Mapped, use_cache, need_offset, nullable>
{
using Self = HashMethodOneNumber<Value, Mapped, FieldType, use_cache, need_offset>;
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
using Self = HashMethodOneNumber<Value, Mapped, FieldType, use_cache, need_offset, nullable>;
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset, nullable>;
static constexpr bool has_cheap_key_calculation = true;
const char * vec;
/// If the keys of a fixed length then key_sizes contains their lengths, empty otherwise.
HashMethodOneNumber(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &)
HashMethodOneNumber(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) : Base(key_columns[0])
{
vec = key_columns[0]->getRawData().data();
if constexpr (nullable)
{
const auto * null_column = checkAndGetColumn<ColumnNullable>(key_columns[0]);
vec = null_column->getNestedColumnPtr()->getRawData().data();
}
else
{
vec = key_columns[0]->getRawData().data();
}
}
explicit HashMethodOneNumber(const IColumn * column)
explicit HashMethodOneNumber(const IColumn * column) : Base(column)
{
vec = column->getRawData().data();
if constexpr (nullable)
{
const auto * null_column = checkAndGetColumn<ColumnNullable>(column);
vec = null_column->getNestedColumnPtr()->getRawData().data();
}
else
{
vec = column->getRawData().data();
}
}
/// Creates context. Method is called once and result context is used in all threads.
@ -73,22 +88,30 @@ struct HashMethodOneNumber
/// For the case when there is one string key.
template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true, bool need_offset = false>
template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true, bool need_offset = false, bool nullable = false>
struct HashMethodString
: public columns_hashing_impl::HashMethodBase<HashMethodString<Value, Mapped, place_string_to_arena, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
: public columns_hashing_impl::HashMethodBase<HashMethodString<Value, Mapped, place_string_to_arena, use_cache, need_offset, nullable>, Value, Mapped, use_cache, need_offset, nullable>
{
using Self = HashMethodString<Value, Mapped, place_string_to_arena, use_cache, need_offset>;
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
using Self = HashMethodString<Value, Mapped, place_string_to_arena, use_cache, need_offset, nullable>;
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset, nullable>;
static constexpr bool has_cheap_key_calculation = false;
const IColumn::Offset * offsets;
const UInt8 * chars;
HashMethodString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &)
HashMethodString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) : Base(key_columns[0])
{
const IColumn & column = *key_columns[0];
const ColumnString & column_string = assert_cast<const ColumnString &>(column);
const IColumn * column;
if constexpr (nullable)
{
column = checkAndGetColumn<ColumnNullable>(key_columns[0])->getNestedColumnPtr().get();
}
else
{
column = key_columns[0];
}
const ColumnString & column_string = assert_cast<const ColumnString &>(*column);
offsets = column_string.getOffsets().data();
chars = column_string.getChars().data();
}
@ -108,28 +131,35 @@ struct HashMethodString
}
protected:
friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset, nullable>;
};
/// For the case when there is one fixed-length string key.
template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true, bool need_offset = false>
template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true, bool need_offset = false, bool nullable = false>
struct HashMethodFixedString
: public columns_hashing_impl::
HashMethodBase<HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
: public columns_hashing_impl::HashMethodBase<HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache, need_offset, nullable>, Value, Mapped, use_cache, need_offset, nullable>
{
using Self = HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache, need_offset>;
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
using Self = HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache, need_offset, nullable>;
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset, nullable>;
static constexpr bool has_cheap_key_calculation = false;
size_t n;
const ColumnFixedString::Chars * chars;
HashMethodFixedString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &)
HashMethodFixedString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &) : Base(key_columns[0])
{
const IColumn & column = *key_columns[0];
const ColumnFixedString & column_string = assert_cast<const ColumnFixedString &>(column);
const IColumn * column;
if constexpr (nullable)
{
column = checkAndGetColumn<ColumnNullable>(key_columns[0])->getNestedColumnPtr().get();
}
else
{
column = key_columns[0];
}
const ColumnFixedString & column_string = assert_cast<const ColumnFixedString &>(*column);
n = column_string.getN();
chars = &column_string.getChars();
}
@ -149,7 +179,7 @@ struct HashMethodFixedString
}
protected:
friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset, nullable>;
};

View File

@ -6,7 +6,6 @@
#include <Common/HashTable/HashTableKeyHolder.h>
#include <Interpreters/AggregationCommon.h>
namespace DB
{
namespace ErrorCodes
@ -140,7 +139,7 @@ public:
FindResultImpl(bool found_, size_t off) : FindResultImplBase(found_), FindResultImplOffsetBase<need_offset>(off) {}
};
template <typename Derived, typename Value, typename Mapped, bool consecutive_keys_optimization, bool need_offset = false>
template <typename Derived, typename Value, typename Mapped, bool consecutive_keys_optimization, bool need_offset = false, bool nullable = false>
class HashMethodBase
{
public:
@ -154,6 +153,19 @@ public:
template <typename Data>
ALWAYS_INLINE EmplaceResult emplaceKey(Data & data, size_t row, Arena & pool)
{
if constexpr (nullable)
{
if (isNullAt(row))
{
bool has_null_key = data.hasNullKeyData();
data.hasNullKeyData() = true;
if constexpr (has_mapped)
return EmplaceResult(data.getNullKeyData(), data.getNullKeyData(), !has_null_key);
else
return EmplaceResult(!has_null_key);
}
}
auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool);
return emplaceImpl(key_holder, data);
}
@ -161,6 +173,16 @@ public:
template <typename Data>
ALWAYS_INLINE FindResult findKey(Data & data, size_t row, Arena & pool)
{
if constexpr (nullable)
{
if (isNullAt(row))
{
if constexpr (has_mapped)
return FindResult(&data.getNullKeyData(), data.hasNullKeyData(), 0);
else
return FindResult(data.hasNullKeyData(), 0);
}
}
auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool);
return findKeyImpl(keyHolderGetKey(key_holder), data);
}
@ -172,10 +194,25 @@ public:
return data.hash(keyHolderGetKey(key_holder));
}
ALWAYS_INLINE bool isNullAt(size_t row) const
{
if constexpr (nullable)
{
return null_map->getBool(row);
}
else
{
return false;
}
}
protected:
Cache cache;
const IColumn * null_map = nullptr;
bool has_null_data = false;
HashMethodBase()
/// column argument only for nullable column
explicit HashMethodBase(const IColumn * column = nullptr)
{
if constexpr (consecutive_keys_optimization)
{
@ -188,6 +225,11 @@ protected:
else
cache.value = Value();
}
if constexpr (nullable)
{
null_map = &checkAndGetColumn<ColumnNullable>(column)->getNullMapColumn();
}
}
template <typename Data, typename KeyHolder>
@ -293,7 +335,6 @@ protected:
}
};
template <typename T>
struct MappedCache : public PaddedPODArray<T> {};

View File

@ -9,7 +9,7 @@ class TwoLevelStringHashMap : public TwoLevelStringHashTable<StringHashMapSubMap
public:
using Key = StringRef;
using Self = TwoLevelStringHashMap;
using Base = TwoLevelStringHashTable<StringHashMapSubMaps<TMapped, Allocator>, StringHashMap<TMapped, Allocator>>;
using Base = TwoLevelStringHashTable<StringHashMapSubMaps<TMapped, Allocator>, ImplTable<TMapped, Allocator>>;
using LookupResult = typename Base::LookupResult;
using Base::Base;

View File

@ -775,6 +775,31 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod()
if (has_nullable_key)
{
/// Optimization for one key
if (params.keys_size == 1 && !has_low_cardinality)
{
if (types_removed_nullable[0]->isValueRepresentedByNumber())
{
size_t size_of_field = types_removed_nullable[0]->getSizeOfValueInMemory();
if (size_of_field == 1)
return AggregatedDataVariants::Type::nullable_key8;
if (size_of_field == 2)
return AggregatedDataVariants::Type::nullable_key16;
if (size_of_field == 4)
return AggregatedDataVariants::Type::nullable_key32;
if (size_of_field == 8)
return AggregatedDataVariants::Type::nullable_key64;
}
if (isFixedString(types_removed_nullable[0]))
{
return AggregatedDataVariants::Type::nullable_key_fixed_string;
}
if (isString(types_removed_nullable[0]))
{
return AggregatedDataVariants::Type::nullable_key_string;
}
}
if (params.keys_size == num_fixed_contiguous_keys && !has_low_cardinality)
{
/// Pack if possible all the keys along with information about which key values are nulls
@ -1889,7 +1914,7 @@ inline void Aggregator::insertAggregatesIntoColumns(Mapped & mapped, MutableColu
template <bool use_compiled_functions>
Block Aggregator::insertResultsIntoColumns(PaddedPODArray<AggregateDataPtr> & places, OutputBlockColumns && out_cols, Arena * arena) const
Block Aggregator::insertResultsIntoColumns(PaddedPODArray<AggregateDataPtr> & places, OutputBlockColumns && out_cols, Arena * arena, bool has_null_key_data [[maybe_unused]]) const
{
std::exception_ptr exception;
size_t aggregate_functions_destroy_index = 0;
@ -1912,8 +1937,12 @@ Block Aggregator::insertResultsIntoColumns(PaddedPODArray<AggregateDataPtr> & pl
continue;
auto & final_aggregate_column = out_cols.final_aggregate_columns[i];
final_aggregate_column = final_aggregate_column->cloneResized(places.size());
columns_data.emplace_back(getColumnData(final_aggregate_column.get()));
/**
* In convertToBlockImplFinal, additional data with a key of null may be written,
* and additional memory for null data needs to be allocated when using the compiled function
*/
final_aggregate_column = final_aggregate_column->cloneResized(places.size() + (has_null_key_data ? 1 : 0));
columns_data.emplace_back(getColumnData(final_aggregate_column.get(), (has_null_key_data ? 1 : 0)));
}
auto insert_aggregates_into_columns_function = compiled_functions.insert_aggregates_into_columns_function;
@ -1982,15 +2011,21 @@ Aggregator::convertToBlockImplFinal(Method & method, Table & data, Arena * arena
std::optional<OutputBlockColumns> out_cols;
std::optional<Sizes> shuffled_key_sizes;
PaddedPODArray<AggregateDataPtr> places;
bool has_null_key_data = false;
auto init_out_cols = [&]()
{
out_cols = prepareOutputBlockColumns(params, aggregate_functions, getHeader(final), aggregates_pools, final, max_block_size);
if constexpr (Method::low_cardinality_optimization)
if constexpr (Method::low_cardinality_optimization || Method::one_key_nullable_optimization)
{
/**
* When one_key_nullable_optimization is enabled, null data will be written to the key column and result column in advance.
* And in insertResultsIntoColumns need to allocate memory for null data.
*/
if (data.hasNullKeyData())
{
has_null_key_data = Method::one_key_nullable_optimization;
out_cols->key_columns[0]->insertDefault();
insertAggregatesIntoColumns(data.getNullKeyData(), out_cols->final_aggregate_columns, arena);
data.hasNullKeyData() = false;
@ -2022,7 +2057,7 @@ Aggregator::convertToBlockImplFinal(Method & method, Table & data, Arena * arena
{
if (places.size() >= max_block_size)
{
res.emplace_back(insertResultsIntoColumns<use_compiled_functions>(places, std::move(out_cols.value()), arena));
res.emplace_back(insertResultsIntoColumns<use_compiled_functions>(places, std::move(out_cols.value()), arena, has_null_key_data));
places.clear();
out_cols.reset();
}
@ -2031,12 +2066,12 @@ Aggregator::convertToBlockImplFinal(Method & method, Table & data, Arena * arena
if constexpr (return_single_block)
{
return insertResultsIntoColumns<use_compiled_functions>(places, std::move(out_cols.value()), arena);
return insertResultsIntoColumns<use_compiled_functions>(places, std::move(out_cols.value()), arena, has_null_key_data);
}
else
{
if (out_cols.has_value())
res.emplace_back(insertResultsIntoColumns<use_compiled_functions>(places, std::move(out_cols.value()), arena));
res.emplace_back(insertResultsIntoColumns<use_compiled_functions>(places, std::move(out_cols.value()), arena, has_null_key_data));
return res;
}
}
@ -2056,7 +2091,7 @@ Aggregator::convertToBlockImplNotFinal(Method & method, Table & data, Arenas & a
{
out_cols = prepareOutputBlockColumns(params, aggregate_functions, getHeader(final), aggregates_pools, final, max_block_size);
if constexpr (Method::low_cardinality_optimization)
if constexpr (Method::low_cardinality_optimization || Method::one_key_nullable_optimization)
{
if (data.hasNullKeyData())
{
@ -2397,7 +2432,7 @@ void NO_INLINE Aggregator::mergeDataNullKey(
Table & table_src,
Arena * arena) const
{
if constexpr (Method::low_cardinality_optimization)
if constexpr (Method::low_cardinality_optimization || Method::one_key_nullable_optimization)
{
if (table_src.hasNullKeyData())
{
@ -2429,7 +2464,7 @@ void NO_INLINE Aggregator::mergeDataNullKey(
template <typename Method, bool use_compiled_functions, bool prefetch, typename Table>
void NO_INLINE Aggregator::mergeDataImpl(Table & table_dst, Table & table_src, Arena * arena) const
{
if constexpr (Method::low_cardinality_optimization)
if constexpr (Method::low_cardinality_optimization || Method::one_key_nullable_optimization)
mergeDataNullKey<Method, Table>(table_dst, table_src, arena);
auto merge = [&](AggregateDataPtr & __restrict dst, AggregateDataPtr & __restrict src, bool inserted)
@ -2490,7 +2525,7 @@ void NO_INLINE Aggregator::mergeDataNoMoreKeysImpl(
Arena * arena) const
{
/// Note : will create data for NULL key if not exist
if constexpr (Method::low_cardinality_optimization)
if constexpr (Method::low_cardinality_optimization || Method::one_key_nullable_optimization)
mergeDataNullKey<Method, Table>(table_dst, table_src, arena);
table_src.mergeToViaFind(table_dst, [&](AggregateDataPtr dst, AggregateDataPtr & src, bool found)
@ -2518,7 +2553,7 @@ void NO_INLINE Aggregator::mergeDataOnlyExistingKeysImpl(
Arena * arena) const
{
/// Note : will create data for NULL key if not exist
if constexpr (Method::low_cardinality_optimization)
if constexpr (Method::low_cardinality_optimization || Method::one_key_nullable_optimization)
mergeDataNullKey<Method, Table>(table_dst, table_src, arena);
table_src.mergeToViaFind(table_dst,
@ -3194,7 +3229,7 @@ void NO_INLINE Aggregator::convertBlockToTwoLevelImpl(
/// For every row.
for (size_t i = 0; i < rows; ++i)
{
if constexpr (Method::low_cardinality_optimization)
if constexpr (Method::low_cardinality_optimization || Method::one_key_nullable_optimization)
{
if (state.isNullAt(i))
{

View File

@ -159,10 +159,17 @@ using StringHashTableWithNullKey = AggregationDataWithNullKey<StringHashMap<Type
using AggregatedDataWithNullableUInt8Key = AggregationDataWithNullKey<AggregatedDataWithUInt8Key>;
using AggregatedDataWithNullableUInt16Key = AggregationDataWithNullKey<AggregatedDataWithUInt16Key>;
using AggregatedDataWithNullableUInt32Key = AggregationDataWithNullKey<AggregatedDataWithUInt32Key>;
using AggregatedDataWithNullableUInt64Key = AggregationDataWithNullKey<AggregatedDataWithUInt64Key>;
using AggregatedDataWithNullableStringKey = AggregationDataWithNullKey<AggregatedDataWithStringKey>;
using AggregatedDataWithNullableShortStringKey = AggregationDataWithNullKey<AggregatedDataWithShortStringKey>;
using AggregatedDataWithNullableUInt32KeyTwoLevel = AggregationDataWithNullKeyTwoLevel<
TwoLevelHashMap<UInt32, AggregateDataPtr, HashCRC32<UInt32>,
TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>;
using AggregatedDataWithNullableUInt64KeyTwoLevel = AggregationDataWithNullKeyTwoLevel<
TwoLevelHashMap<UInt64, AggregateDataPtr, HashCRC32<UInt64>,
TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>;
@ -174,11 +181,10 @@ using AggregatedDataWithNullableStringKeyTwoLevel = AggregationDataWithNullKeyTw
TwoLevelHashMapWithSavedHash<StringRef, AggregateDataPtr, DefaultHash<StringRef>,
TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>;
/// For the case where there is one numeric key.
/// FieldType is UInt8/16/32/64 for any type with corresponding bit width.
template <typename FieldType, typename TData,
bool consecutive_keys_optimization = true>
bool consecutive_keys_optimization = true, bool nullable = false>
struct AggregationMethodOneNumber
{
using Data = TData;
@ -198,10 +204,11 @@ struct AggregationMethodOneNumber
/// To use one `Method` in different threads, use different `State`.
using State = ColumnsHashing::HashMethodOneNumber<typename Data::value_type,
Mapped, FieldType, consecutive_keys_optimization>;
Mapped, FieldType, consecutive_keys_optimization, false, nullable>;
/// Use optimization for low cardinality.
static const bool low_cardinality_optimization = false;
static const bool one_key_nullable_optimization = nullable;
/// Shuffle key columns before `insertKeyIntoColumns` call if needed.
std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; }
@ -209,9 +216,20 @@ struct AggregationMethodOneNumber
// Insert the key from the hash table into columns.
static void insertKeyIntoColumns(const Key & key, std::vector<IColumn *> & key_columns, const Sizes & /*key_sizes*/)
{
ColumnVectorHelper * column;
if constexpr (nullable)
{
ColumnNullable & nullable_col = assert_cast<ColumnNullable &>(*key_columns[0]);
ColumnUInt8 * null_map = assert_cast<ColumnUInt8 *>(&nullable_col.getNullMapColumn());
null_map->insertDefault();
column = static_cast<ColumnVectorHelper *>(&nullable_col.getNestedColumn());
}
else
{
column = static_cast<ColumnVectorHelper *>(key_columns[0]);
}
static_assert(sizeof(FieldType) <= sizeof(Key));
const auto * key_holder = reinterpret_cast<const char *>(&key);
auto * column = static_cast<ColumnVectorHelper *>(key_columns[0]);
if constexpr (sizeof(FieldType) < sizeof(Key) && std::endian::native == std::endian::big)
column->insertRawData<sizeof(FieldType)>(key_holder + (sizeof(Key) - sizeof(FieldType)));
else
@ -242,6 +260,7 @@ struct AggregationMethodString
using State = ColumnsHashing::HashMethodString<typename Data::value_type, Mapped>;
static const bool low_cardinality_optimization = false;
static const bool one_key_nullable_optimization = false;
std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; }
@ -253,7 +272,7 @@ struct AggregationMethodString
/// Same as above but without cache
template <typename TData>
template <typename TData, bool nullable = false>
struct AggregationMethodStringNoCache
{
using Data = TData;
@ -271,15 +290,23 @@ struct AggregationMethodStringNoCache
{
}
using State = ColumnsHashing::HashMethodString<typename Data::value_type, Mapped, true, false>;
using State = ColumnsHashing::HashMethodString<typename Data::value_type, Mapped, true, false, false ,nullable>;
static const bool low_cardinality_optimization = false;
static const bool one_key_nullable_optimization = nullable;
std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; }
static void insertKeyIntoColumns(StringRef key, std::vector<IColumn *> & key_columns, const Sizes &)
{
static_cast<ColumnString *>(key_columns[0])->insertData(key.data, key.size);
if constexpr (nullable)
{
static_cast<ColumnNullable *>(key_columns[0])->insertData(key.data, key.size);
}
else
{
static_cast<ColumnString *>(key_columns[0])->insertData(key.data, key.size);
}
}
};
@ -306,6 +333,7 @@ struct AggregationMethodFixedString
using State = ColumnsHashing::HashMethodFixedString<typename Data::value_type, Mapped>;
static const bool low_cardinality_optimization = false;
static const bool one_key_nullable_optimization = false;
std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; }
@ -316,7 +344,7 @@ struct AggregationMethodFixedString
};
/// Same as above but without cache
template <typename TData>
template <typename TData, bool nullable = false>
struct AggregationMethodFixedStringNoCache
{
using Data = TData;
@ -334,15 +362,23 @@ struct AggregationMethodFixedStringNoCache
{
}
using State = ColumnsHashing::HashMethodFixedString<typename Data::value_type, Mapped, true, false>;
using State = ColumnsHashing::HashMethodFixedString<typename Data::value_type, Mapped, true, false, false, nullable>;
static const bool low_cardinality_optimization = false;
static const bool one_key_nullable_optimization = nullable;
std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; }
static void insertKeyIntoColumns(StringRef key, std::vector<IColumn *> & key_columns, const Sizes &)
{
static_cast<ColumnFixedString *>(key_columns[0])->insertData(key.data, key.size);
if constexpr (nullable)
{
static_cast<ColumnNullable *>(key_columns[0])->insertData(key.data, key.size);
}
else
{
static_cast<ColumnFixedString *>(key_columns[0])->insertData(key.data, key.size);
}
}
};
@ -418,6 +454,7 @@ struct AggregationMethodKeysFixed
use_cache>;
static const bool low_cardinality_optimization = false;
static const bool one_key_nullable_optimization = false;
std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> & key_columns, const Sizes & key_sizes)
{
@ -505,6 +542,7 @@ struct AggregationMethodSerialized
using State = ColumnsHashing::HashMethodSerialized<typename Data::value_type, Mapped>;
static const bool low_cardinality_optimization = false;
static const bool one_key_nullable_optimization = false;
std::optional<Sizes> shuffleKeyColumns(std::vector<IColumn *> &, const Sizes &) { return {}; }
@ -588,6 +626,18 @@ struct AggregatedDataVariants : private boost::noncopyable
std::unique_ptr<AggregationMethodSerialized<AggregatedDataWithStringKeyHash64>> serialized_hash64;
/// Support for nullable keys.
std::unique_ptr<AggregationMethodOneNumber<UInt8, AggregatedDataWithNullableUInt8Key, false, true>> nullable_key8;
std::unique_ptr<AggregationMethodOneNumber<UInt16, AggregatedDataWithNullableUInt16Key, false, true>> nullable_key16;
std::unique_ptr<AggregationMethodOneNumber<UInt32, AggregatedDataWithNullableUInt32Key, true, true>> nullable_key32;
std::unique_ptr<AggregationMethodOneNumber<UInt64, AggregatedDataWithNullableUInt64Key, true, true>> nullable_key64;
std::unique_ptr<AggregationMethodOneNumber<UInt32, AggregatedDataWithNullableUInt32KeyTwoLevel, true, true>> nullable_key32_two_level;
std::unique_ptr<AggregationMethodOneNumber<UInt64, AggregatedDataWithNullableUInt64KeyTwoLevel, true, true>> nullable_key64_two_level;
std::unique_ptr<AggregationMethodStringNoCache<AggregatedDataWithNullableShortStringKey, true>> nullable_key_string;
std::unique_ptr<AggregationMethodFixedStringNoCache<AggregatedDataWithNullableShortStringKey, true>> nullable_key_fixed_string;
std::unique_ptr<AggregationMethodStringNoCache<AggregatedDataWithNullableShortStringKeyTwoLevel, true>> nullable_key_string_two_level;
std::unique_ptr<AggregationMethodFixedStringNoCache<AggregatedDataWithNullableShortStringKeyTwoLevel, true>> nullable_key_fixed_string_two_level;
std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128, true>> nullable_keys128;
std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256, true>> nullable_keys256;
std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128TwoLevel, true>> nullable_keys128_two_level;
@ -640,6 +690,16 @@ struct AggregatedDataVariants : private boost::noncopyable
M(keys128_hash64, false) \
M(keys256_hash64, false) \
M(serialized_hash64, false) \
M(nullable_key8, false) \
M(nullable_key16, false) \
M(nullable_key32, false) \
M(nullable_key64, false) \
M(nullable_key32_two_level, true) \
M(nullable_key64_two_level, true) \
M(nullable_key_string, false) \
M(nullable_key_fixed_string, false) \
M(nullable_key_string_two_level, true) \
M(nullable_key_fixed_string_two_level, true) \
M(nullable_keys128, false) \
M(nullable_keys256, false) \
M(nullable_keys128_two_level, true) \
@ -754,6 +814,10 @@ struct AggregatedDataVariants : private boost::noncopyable
M(keys128) \
M(keys256) \
M(serialized) \
M(nullable_key32) \
M(nullable_key64) \
M(nullable_key_string) \
M(nullable_key_fixed_string) \
M(nullable_keys128) \
M(nullable_keys256) \
M(low_cardinality_key32) \
@ -767,6 +831,8 @@ struct AggregatedDataVariants : private boost::noncopyable
#define APPLY_FOR_VARIANTS_NOT_CONVERTIBLE_TO_TWO_LEVEL(M) \
M(key8) \
M(key16) \
M(nullable_key8) \
M(nullable_key16) \
M(keys16) \
M(key64_hash64) \
M(key_string_hash64)\
@ -810,6 +876,10 @@ struct AggregatedDataVariants : private boost::noncopyable
M(keys128_two_level) \
M(keys256_two_level) \
M(serialized_two_level) \
M(nullable_key32_two_level) \
M(nullable_key64_two_level) \
M(nullable_key_string_two_level) \
M(nullable_key_fixed_string_two_level) \
M(nullable_keys128_two_level) \
M(nullable_keys256_two_level) \
M(low_cardinality_key32_two_level) \
@ -1290,7 +1360,7 @@ private:
Arena * arena) const;
template <bool use_compiled_functions>
Block insertResultsIntoColumns(PaddedPODArray<AggregateDataPtr> & places, OutputBlockColumns && out_cols, Arena * arena) const;
Block insertResultsIntoColumns(PaddedPODArray<AggregateDataPtr> & places, OutputBlockColumns && out_cols, Arena * arena, bool has_null_key_data) const;
template <typename Method, bool use_compiled_functions, bool return_single_block, typename Table>
ConvertToBlockRes<return_single_block>

View File

@ -41,7 +41,7 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
ColumnData getColumnData(const IColumn * column)
ColumnData getColumnData(const IColumn * column, size_t skip_rows)
{
const bool is_const = isColumnConst(*column);
@ -52,11 +52,11 @@ ColumnData getColumnData(const IColumn * column)
if (const auto * nullable = typeid_cast<const ColumnNullable *>(column))
{
result.null_data = nullable->getNullMapColumn().getRawData().data();
result.null_data = nullable->getNullMapColumn().getDataAt(skip_rows).data;
column = &nullable->getNestedColumn();
}
result.data = column->getRawData().data();
/// skip null key data for one nullable key optimization
result.data = column->getDataAt(skip_rows).data;
return result;
}

View File

@ -26,7 +26,7 @@ struct ColumnData
/** Returns ColumnData for column.
* If constant column is passed, LOGICAL_ERROR will be thrown.
*/
ColumnData getColumnData(const IColumn * column);
ColumnData getColumnData(const IColumn * column, size_t skip_rows = 0);
using ColumnDataRowsOffset = size_t;
using ColumnDataRowsSize = size_t;

View File

@ -0,0 +1,49 @@
<test>
<settings>
<max_insert_threads>8</max_insert_threads>
<allow_experimental_projection_optimization>0</allow_experimental_projection_optimization>
</settings>
<substitutions>
<substitution>
<name>key_type</name>
<values>
<value>key_string</value>
<value>key_string_two_level</value>
<value>key_fixed_string</value>
<value>key_int64</value>
<value>key_int64_two_level</value>
<value>key_int32</value>
<value>key_int32_two_level</value>
<value>Key_int16</value>
<value>key_int8</value>
</values>
</substitution>
</substitutions>
<create_query>
CREATE TABLE t_nullable
(
key_string Nullable(String),
key_string_two_level Nullable(String),
key_fixed_string Nullable(FixedString(3)),
key_int64 Nullable(Int64),
key_int64_two_level Nullable(Int64),
key_int32 Nullable(Int32),
key_int32_two_level Nullable(Int32),
Key_int16 Nullable(Int16),
key_int8 Nullable(Int8),
i1 Nullable(Int64),
i2 Nullable(Int64),
)
ENGINE = MergeTree
ORDER BY tuple()
</create_query>
<fill_query>insert into t_nullable select ['aa','bb','cc','dd'][number % 10000 + 1], ['aa','bb','cc','dd'][number % 200000 + 1], ['aa','bb','cc','dd'][number % 4 + 1], toInt64(number%10000), toInt64(number%200000), toInt32(number%10000), toInt32(number%200000), toInt16(number%10000), toInt8(number%100), number%6000+1, number%5000+2 from numbers_mt(20000000)</fill_query>
<fill_query>insert into t_nullable select ['aa','bb','cc','dd'][number % 10000 + 1], ['aa','bb','cc','dd'][number % 200000 + 1], ['aa','bb','cc','dd'][number % 4 + 1], toInt64(number%10000), toInt64(number%200000), toInt32(number%10000), toInt32(number%200000), toInt16(number%10000), toInt8(number%100), number%6000+1, number%5000+2 from numbers_mt(20000000)</fill_query>
<fill_query>insert into t_nullable select ['aa','bb','cc','dd'][number % 10000 + 1], ['aa','bb','cc','dd'][number % 200000 + 1], ['aa','bb','cc','dd'][number % 4 + 1], toInt64(number%10000), toInt64(number%200000), toInt32(number%10000), toInt32(number%200000), toInt16(number%10000), toInt8(number%100), number%6000+1, number%5000+2 from numbers_mt(20000000)</fill_query>
<fill_query>insert into t_nullable select ['aa','bb','cc','dd'][number % 10000 + 1], ['aa','bb','cc','dd'][number % 200000 + 1], ['aa','bb','cc','dd'][number % 4 + 1], toInt64(number%10000), toInt64(number%200000), toInt32(number%10000), toInt32(number%200000), toInt16(number%10000), toInt8(number%100), number%6000+1, number%5000+2 from numbers_mt(20000000)</fill_query>
<fill_query>OPTIMIZE TABLE t_nullable FINAL</fill_query>
<query>select min(i1), min(i2) from t_nullable group by {key_type} format Null</query>
<drop_query>drop table if exists t_nullable</drop_query>
</test>