dbms: Server: added support for aggregation with nullable keys [#METR-19266]

This commit is contained in:
Alexey Arno 2016-10-18 13:09:48 +03:00
parent 9fc95c5093
commit 2a0c842966
3 changed files with 456 additions and 33 deletions

View File

@ -11,6 +11,7 @@
#include <DB/Columns/IColumn.h> #include <DB/Columns/IColumn.h>
#include <DB/Columns/ColumnsNumber.h> #include <DB/Columns/ColumnsNumber.h>
#include <DB/Columns/ColumnFixedString.h> #include <DB/Columns/ColumnFixedString.h>
#include <DB/Columns/ColumnNullable.h>
template <> template <>
@ -30,6 +31,7 @@ using Sizes = std::vector<size_t>;
/// ceil(T/8). Thus we must have: S = T + ceil(T/8). Below we indicate for /// ceil(T/8). Thus we must have: S = T + ceil(T/8). Below we indicate for
/// each value of S, the corresponding value of T, and the bitmap size: /// each value of S, the corresponding value of T, and the bitmap size:
/// ///
/// 32,28,4
/// 16,14,2 /// 16,14,2
/// 8,7,1 /// 8,7,1
/// 4,3,1 /// 4,3,1
@ -43,6 +45,8 @@ template <typename T>
constexpr auto getBitmapSize() constexpr auto getBitmapSize()
{ {
return return
(sizeof(T) == 32) ?
4 :
(sizeof(T) == 16) ? (sizeof(T) == 16) ?
2 : 2 :
((sizeof(T) == 8) ? ((sizeof(T) == 8) ?
@ -117,7 +121,7 @@ static inline T ALWAYS_INLINE packFixed(
size_t offset = 0; size_t offset = 0;
static constexpr auto bitmap_size = std::tuple_size<KeysNullMap<T>>::value; static constexpr auto bitmap_size = std::tuple_size<KeysNullMap<T>>::value;
bool has_bitmap = bitmap_size > 0; static constexpr bool has_bitmap = bitmap_size > 0;
if (has_bitmap) if (has_bitmap)
{ {
@ -234,17 +238,29 @@ static inline StringRef * ALWAYS_INLINE extractKeysAndPlaceInPool(
} }
/** Скопировать ключи в пул в непрерывный кусок памяти. /// Place the specified keys into a continuous memory chunk. The implementation
* Потом разместить в пуле StringRef-ы на них. /// of this function depends on whether some keys are nullable or not. See comments
* /// below for the specialized implementations.
* [key1][key2]...[keyN][ref1][ref2]...[refN] template <bool has_nullable_keys>
* ^---------------------| | static StringRef extractKeysAndPlaceInPoolContiguous(
* ^---------------------| size_t i, size_t keys_size, const ConstColumnPlainPtrs & key_columns, StringRefs & keys, Arena & pool);
* ^---return-value----^
* /// Implementation for the case when there are no nullable keys.
* Вернуть StringRef на кусок памяти с ключами (без учёта StringRef-ов после них). /// Copy the specified keys to a continuous memory chunk of a pool.
*/ /// Subsequently append StringRef objects referring to each key.
static inline StringRef ALWAYS_INLINE extractKeysAndPlaceInPoolContiguous( ///
/// [key1][key2]...[keyN][ref1][ref2]...[refN]
/// ^ ^ : | |
/// +-----|--------:-----+ |
/// : +--------:-----------+
/// : :
/// <-------------->
/// (1)
///
/// Return a StringRef object, referring to the area (1) of the memory
/// chunk that contains the keys. In other words, we ignore their StringRefs.
template <>
inline StringRef ALWAYS_INLINE extractKeysAndPlaceInPoolContiguous<false>(
size_t i, size_t keys_size, const ConstColumnPlainPtrs & key_columns, StringRefs & keys, Arena & pool) size_t i, size_t keys_size, const ConstColumnPlainPtrs & key_columns, StringRefs & keys, Arena & pool)
{ {
size_t sum_keys_size = 0; size_t sum_keys_size = 0;
@ -270,6 +286,93 @@ static inline StringRef ALWAYS_INLINE extractKeysAndPlaceInPoolContiguous(
return {res, sum_keys_size}; return {res, sum_keys_size};
} }
/// Implementation for the case where there is at least one nullable key.
/// Inside a continuous memory chunk of a pool, put a bitmap that indicates
/// for each specified key whether its value is null or not. Copy the keys
/// whose values are not nulls to the memory chunk. Subsequently append
/// StringRef objects referring to each key, even those who contain a null.
///
/// [bitmap][key1][key2][key4]...[keyN][ref1][ref2][ref3 (null)]...[refN]
/// : ^ ^ : | |
/// : +-----|--------------:-----+ |
/// : +--------------:-----------+
/// : :
/// <---------------------------->
/// (1)
///
/// Return a StringRef object, referring to the area (1) of the memory
/// chunk that contains the bitmap and the keys. In other words, we ignore
/// the keys' StringRefs.
template <>
inline StringRef ALWAYS_INLINE extractKeysAndPlaceInPoolContiguous<true>(
size_t i, size_t keys_size, const ConstColumnPlainPtrs & key_columns, StringRefs & keys, Arena & pool)
{
size_t bitmap_size = keys_size / 8;
if ((keys_size % 8) != 0) { ++bitmap_size; }
std::vector<UInt8> bitmap(bitmap_size);
/// Prepare the keys to be stored. Create the bitmap.
size_t keys_bytes = 0;
for (size_t j = 0; j < keys_size; ++j)
{
const IColumn * observed_column;
bool is_null;
if (key_columns[j]->isNullable())
{
const ColumnNullable & nullable_col = static_cast<const ColumnNullable &>(*key_columns[j]);
observed_column = nullable_col.getNestedColumn().get();
const auto & null_map = static_cast<const ColumnUInt8 &>(*nullable_col.getNullValuesByteMap()).getData();
is_null = null_map[i] == 1;
}
else
{
observed_column = key_columns[j];
is_null = false;
}
if (is_null)
{
size_t bucket = j / 8;
size_t offset = j % 8;
bitmap[bucket] |= UInt8(1) << offset;
keys[j] = StringRef{};
}
else
{
keys[j] = observed_column->getDataAtWithTerminatingZero(i);
keys_bytes += keys[j].size;
}
}
/// Allocate space for bitmap + non-null keys + StringRef objects.
char * res = pool.alloc(bitmap_size + keys_bytes + keys_size * sizeof(StringRef));
char * place = res;
/// Store the bitmap.
memcpy(place, bitmap.data(), bitmap.size());
place += bitmap.size();
/// Store the non-null keys data.
for (size_t j = 0; j < keys_size; ++j)
{
size_t bucket = j / 8;
size_t offset = j % 8;
if (((bitmap[bucket] >> offset) & 1) == 0)
{
memcpy(place, keys[j].data, keys[j].size);
keys[j].data = place;
place += keys[j].size;
}
}
/// Store StringRef objects for all the keys, i.e. even for those
/// whose value is null.
memcpy(place, &keys[0], keys_size * sizeof(StringRef));
return {res, bitmap_size + keys_bytes};
}
/** Сериализовать ключи в непрерывный кусок памяти. /** Сериализовать ключи в непрерывный кусок памяти.
*/ */

View File

@ -25,6 +25,7 @@
#include <DB/Columns/ColumnFixedString.h> #include <DB/Columns/ColumnFixedString.h>
#include <DB/Columns/ColumnAggregateFunction.h> #include <DB/Columns/ColumnAggregateFunction.h>
#include <DB/Columns/ColumnVector.h> #include <DB/Columns/ColumnVector.h>
#include <DB/Columns/ColumnNullable.h>
namespace DB namespace DB
@ -265,9 +266,102 @@ struct AggregationMethodFixedString
} }
}; };
/// This class is designed to provide the functionality that is required for
/// supporting nullable keys in AggregationMethodKeysFixed. If there are
/// no nullable keys, this class is merely implemented as an empty shell.
template <typename Key, bool has_nullable_keys>
class BaseStateKeysFixed;
/// Case where nullable keys are supported.
template <typename Key>
class BaseStateKeysFixed<Key, true>
{
protected:
void init(const ConstColumnPlainPtrs & key_columns)
{
null_maps.reserve(key_columns.size());
actual_columns.reserve(key_columns.size());
for (const auto & col : key_columns)
{
if (col->isNullable())
{
const auto & nullable_col = static_cast<const ColumnNullable &>(*col);
actual_columns.push_back(nullable_col.getNestedColumn().get());
null_maps.push_back(nullable_col.getNullValuesByteMap().get());
has_nullable_columns = true;
}
else
{
actual_columns.push_back(col);
null_maps.push_back(nullptr);
}
}
}
/// Return the columns which actually contain the values of the keys.
/// For a given key column, if it is nullable, we return its nested
/// column. Otherwise we return the key column itself.
inline const ConstColumnPlainPtrs & getActualColumns() const
{
return actual_columns;
}
/// Create a bitmap that indicates whether, for a particular row,
/// a key column bears a null value or not.
KeysNullMap<Key> createBitmap(size_t row) const
{
KeysNullMap<Key> bitmap{};
for (size_t k = 0; k < null_maps.size(); ++k)
{
if (null_maps[k] != nullptr)
{
const auto & null_map = static_cast<const ColumnUInt8 &>(*null_maps[k]).getData();
if (null_map[row] == 1)
{
size_t bucket = k / 8;
size_t offset = k % 8;
bitmap[bucket] |= UInt8(1) << offset;
}
}
}
return bitmap;
}
private:
ConstColumnPlainPtrs actual_columns;
ConstColumnPlainPtrs null_maps;
bool has_nullable_columns = false;
};
/// Case where nullable keys are not supported.
template <typename Key>
class BaseStateKeysFixed<Key, false>
{
protected:
void init(const ConstColumnPlainPtrs & key_columns)
{
throw Exception{"Internal error: calling init() for non-nullable"
" keys is forbidden", ErrorCodes::LOGICAL_ERROR};
}
const ConstColumnPlainPtrs & getActualColumns() const
{
throw Exception{"Internal error: calling getActualColumns() for non-nullable"
" keys is forbidden", ErrorCodes::LOGICAL_ERROR};
}
KeysNullMap<Key> createBitmap(size_t row) const
{
throw Exception{"Internal error: calling createBitmap() for non-nullable keys"
" is forbidden", ErrorCodes::LOGICAL_ERROR};
}
};
/// Для случая, когда все ключи фиксированной длины, и они помещаются в N (например, 128) бит. /// Для случая, когда все ключи фиксированной длины, и они помещаются в N (например, 128) бит.
template <typename TData> template <typename TData, bool has_nullable_keys_ = false>
struct AggregationMethodKeysFixed struct AggregationMethodKeysFixed
{ {
using Data = TData; using Data = TData;
@ -275,6 +369,7 @@ struct AggregationMethodKeysFixed
using Mapped = typename Data::mapped_type; using Mapped = typename Data::mapped_type;
using iterator = typename Data::iterator; using iterator = typename Data::iterator;
using const_iterator = typename Data::const_iterator; using const_iterator = typename Data::const_iterator;
static constexpr bool has_nullable_keys = has_nullable_keys_;
Data data; Data data;
@ -283,10 +378,15 @@ struct AggregationMethodKeysFixed
template <typename Other> template <typename Other>
AggregationMethodKeysFixed(const Other & other) : data(other.data) {} AggregationMethodKeysFixed(const Other & other) : data(other.data) {}
struct State class State final : private BaseStateKeysFixed<Key, has_nullable_keys>
{ {
public:
using Base = BaseStateKeysFixed<Key, has_nullable_keys>;
void init(ConstColumnPlainPtrs & key_columns) void init(ConstColumnPlainPtrs & key_columns)
{ {
if (has_nullable_keys)
Base::init(key_columns);
} }
Key getKey( Key getKey(
@ -297,6 +397,12 @@ struct AggregationMethodKeysFixed
StringRefs & keys, StringRefs & keys,
Arena & pool) const Arena & pool) const
{ {
if (has_nullable_keys)
{
auto bitmap = Base::createBitmap(i);
return packFixed<Key>(i, keys_size, Base::getActualColumns(), key_sizes, bitmap);
}
else
return packFixed<Key>(i, keys_size, key_columns, key_sizes); return packFixed<Key>(i, keys_size, key_columns, key_sizes);
} }
}; };
@ -314,19 +420,57 @@ struct AggregationMethodKeysFixed
static void insertKeyIntoColumns(const typename Data::value_type & value, ColumnPlainPtrs & key_columns, size_t keys_size, const Sizes & key_sizes) static void insertKeyIntoColumns(const typename Data::value_type & value, ColumnPlainPtrs & key_columns, size_t keys_size, const Sizes & key_sizes)
{ {
size_t offset = 0; static constexpr auto bitmap_size = has_nullable_keys ? std::tuple_size<KeysNullMap<Key>>::value : 0;
/// In any hash key value, column values to be read start just after the bitmap, if it exists.
size_t offset = bitmap_size;
for (size_t i = 0; i < keys_size; ++i) for (size_t i = 0; i < keys_size; ++i)
{
IColumn * observed_column;
ColumnUInt8 * null_map;
/// If we have a nullable column, get its nested column and its null map.
if (has_nullable_keys && key_columns[i]->isNullable())
{
ColumnNullable & nullable_col = static_cast<ColumnNullable &>(*key_columns[i]);
observed_column = nullable_col.getNestedColumn().get();
null_map = static_cast<ColumnUInt8 *>(nullable_col.getNullValuesByteMap().get());
}
else
{
observed_column = key_columns[i];
null_map = nullptr;
}
bool is_null;
if (has_nullable_keys && key_columns[i]->isNullable())
{
/// The current column is nullable. Check if the value of the
/// corresponding key is nullable. Update the null map accordingly.
size_t bucket = i / 8;
size_t offset = i % 8;
bool val = (reinterpret_cast<const char *>(&value.first)[bucket] >> offset) & 1;
null_map->insert(val);
is_null = val == 1;
}
else
is_null = false;
if (has_nullable_keys && is_null)
observed_column->insertDefault();
else
{ {
size_t size = key_sizes[i]; size_t size = key_sizes[i];
key_columns[i]->insertData(reinterpret_cast<const char *>(&value.first) + offset, size); observed_column->insertData(reinterpret_cast<const char *>(&value.first) + offset, size);
offset += size; offset += size;
} }
} }
}
}; };
/// Агрегирует по конкатенации ключей. (При этом, строки, содержащие нули посередине, могут склеиться.) /// Агрегирует по конкатенации ключей. (При этом, строки, содержащие нули посередине, могут склеиться.)
template <typename TData> template <typename TData, bool has_nullable_keys_ = false>
struct AggregationMethodConcat struct AggregationMethodConcat
{ {
using Data = TData; using Data = TData;
@ -335,6 +479,8 @@ struct AggregationMethodConcat
using iterator = typename Data::iterator; using iterator = typename Data::iterator;
using const_iterator = typename Data::const_iterator; using const_iterator = typename Data::const_iterator;
static constexpr bool has_nullable_keys = has_nullable_keys_;
Data data; Data data;
AggregationMethodConcat() {} AggregationMethodConcat() {}
@ -356,7 +502,7 @@ struct AggregationMethodConcat
StringRefs & keys, StringRefs & keys,
Arena & pool) const Arena & pool) const
{ {
return extractKeysAndPlaceInPoolContiguous(i, keys_size, key_columns, keys, pool); return extractKeysAndPlaceInPoolContiguous<has_nullable_keys>(i, keys_size, key_columns, keys, pool);
} }
}; };
@ -376,6 +522,17 @@ struct AggregationMethodConcat
static const bool no_consecutive_keys_optimization = true; static const bool no_consecutive_keys_optimization = true;
static void insertKeyIntoColumns(const typename Data::value_type & value, ColumnPlainPtrs & key_columns, size_t keys_size, const Sizes & key_sizes) static void insertKeyIntoColumns(const typename Data::value_type & value, ColumnPlainPtrs & key_columns, size_t keys_size, const Sizes & key_sizes)
{
if (has_nullable_keys)
insertKeyIntoNullableColumnsImpl(value, key_columns, keys_size, key_sizes);
else
insertKeyIntoColumnsImpl(value, key_columns, keys_size, key_sizes);
}
private:
/// Insert the values of the specified keys into the corresponding columns.
/// Implementation for the case where there are no nullable keys.
static void insertKeyIntoColumnsImpl(const typename Data::value_type & value, ColumnPlainPtrs & key_columns, size_t keys_size, const Sizes & key_sizes)
{ {
/// См. функцию extractKeysAndPlaceInPoolContiguous. /// См. функцию extractKeysAndPlaceInPoolContiguous.
const StringRef * key_refs = reinterpret_cast<const StringRef *>(value.first.data + value.first.size); const StringRef * key_refs = reinterpret_cast<const StringRef *>(value.first.data + value.first.size);
@ -395,6 +552,92 @@ struct AggregationMethodConcat
key_columns[i]->insertDataWithTerminatingZero(key_refs[i].data, key_refs[i].size); key_columns[i]->insertDataWithTerminatingZero(key_refs[i].data, key_refs[i].size);
} }
} }
/// Insert the value of the specified keys into the corresponding columns.
/// Implementation for the case where there is at least one nullable key.
static void insertKeyIntoNullableColumnsImpl(const typename Data::value_type & value, ColumnPlainPtrs & key_columns, size_t keys_size, const Sizes & key_sizes)
{
size_t compact_bitmap_size = keys_size / 8;
if ((keys_size % 8) != 0) { ++compact_bitmap_size; }
if (unlikely(value.first.size < compact_bitmap_size))
{
/// This code path is logically impossible.
/// Only a bug in the code base can trigger it.
throw Exception{"Aggregator: corrupted hash table key", ErrorCodes::LOGICAL_ERROR};
}
else if (unlikely(value.first.size == compact_bitmap_size))
{
/// This case occurs when each of the keys falls into either of the following two
/// categories: (i) it has a null value; (ii) it represents an empty array.
/// The remarks are the same as for the implementation of the non-nullable case above.
const UInt8 * compact_bitmap = reinterpret_cast<const UInt8 *>(value.first.data);
for (size_t i = 0; i < keys_size; ++i)
{
IColumn * observed_column;
if (key_columns[i]->isNullable())
{
ColumnNullable & nullable_col = static_cast<ColumnNullable &>(*key_columns[i]);
observed_column = nullable_col.getNestedColumn().get();
ColumnUInt8 & null_map = static_cast<ColumnUInt8 &>(*nullable_col.getNullValuesByteMap());
size_t bucket = i / 8;
size_t offset = i % 8;
UInt8 is_null = (compact_bitmap[bucket] >> offset) & 1;
null_map.insert(is_null);
}
else
observed_column = key_columns[i];
observed_column->insertDefault();
}
}
else
{
const UInt8 * compact_bitmap = reinterpret_cast<const UInt8 *>(value.first.data);
const StringRef * key_refs = reinterpret_cast<const StringRef *>(value.first.data + value.first.size);
for (size_t i = 0; i < keys_size; ++i)
{
IColumn * observed_column;
ColumnUInt8 * null_map;
/// If we have a nullable column, get its nested column and its null map.
if (key_columns[i]->isNullable())
{
ColumnNullable & nullable_col = static_cast<ColumnNullable &>(*key_columns[i]);
observed_column = nullable_col.getNestedColumn().get();
null_map = static_cast<ColumnUInt8 *>(nullable_col.getNullValuesByteMap().get());
}
else
{
observed_column = key_columns[i];
null_map = nullptr;
}
bool is_null;
if (key_columns[i]->isNullable())
{
/// The current column is nullable. Check if the value of the
/// corresponding key is nullable. Update the null map accordingly.
size_t bucket = i / 8;
size_t offset = i % 8;
UInt8 val = (compact_bitmap[bucket] >> offset) & 1;
null_map->insert(val);
is_null = val == 1;
}
else
is_null = false;
if (is_null)
observed_column->insertDefault();
else
observed_column->insertDataWithTerminatingZero(key_refs[i].data, key_refs[i].size);
}
}
}
}; };
@ -574,6 +817,14 @@ struct AggregatedDataVariants : private boost::noncopyable
std::unique_ptr<AggregationMethodConcat<AggregatedDataWithStringKeyTwoLevel>> concat_two_level; std::unique_ptr<AggregationMethodConcat<AggregatedDataWithStringKeyTwoLevel>> concat_two_level;
std::unique_ptr<AggregationMethodSerialized<AggregatedDataWithStringKeyTwoLevel>> serialized_two_level; std::unique_ptr<AggregationMethodSerialized<AggregatedDataWithStringKeyTwoLevel>> serialized_two_level;
/// Support for nullable keys.
std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128, true>> nullable_keys128;
std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256, true>> nullable_keys256;
std::unique_ptr<AggregationMethodConcat<AggregatedDataWithStringKey, true>> nullable_concat;
std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128TwoLevel, true>> nullable_keys128_two_level;
std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256TwoLevel, true>> nullable_keys256_two_level;
std::unique_ptr<AggregationMethodConcat<AggregatedDataWithStringKeyTwoLevel, true>> nullable_concat_two_level;
/// В этом и подобных макросах, вариант without_key не учитывается. /// В этом и подобных макросах, вариант without_key не учитывается.
#define APPLY_FOR_AGGREGATED_VARIANTS(M) \ #define APPLY_FOR_AGGREGATED_VARIANTS(M) \
M(key8, false) \ M(key8, false) \
@ -596,6 +847,12 @@ struct AggregatedDataVariants : private boost::noncopyable
M(hashed_two_level, true) \ M(hashed_two_level, true) \
M(concat_two_level, true) \ M(concat_two_level, true) \
M(serialized_two_level, true) \ M(serialized_two_level, true) \
M(nullable_keys128, false) \
M(nullable_keys256, false) \
M(nullable_concat, false) \
M(nullable_keys128_two_level, true) \
M(nullable_keys256_two_level, true) \
M(nullable_concat_two_level, true)
enum class Type enum class Type
{ {
@ -713,6 +970,9 @@ struct AggregatedDataVariants : private boost::noncopyable
M(hashed) \ M(hashed) \
M(concat) \ M(concat) \
M(serialized) \ M(serialized) \
M(nullable_keys128) \
M(nullable_keys256) \
M(nullable_concat) \
#define APPLY_FOR_VARIANTS_NOT_CONVERTIBLE_TO_TWO_LEVEL(M) \ #define APPLY_FOR_VARIANTS_NOT_CONVERTIBLE_TO_TWO_LEVEL(M) \
M(key8) \ M(key8) \
@ -748,7 +1008,10 @@ struct AggregatedDataVariants : private boost::noncopyable
M(keys256_two_level) \ M(keys256_two_level) \
M(hashed_two_level) \ M(hashed_two_level) \
M(concat_two_level) \ M(concat_two_level) \
M(serialized_two_level) M(serialized_two_level) \
M(nullable_keys128_two_level) \
M(nullable_keys256_two_level) \
M(nullable_concat_two_level)
}; };
using AggregatedDataVariantsPtr = std::shared_ptr<AggregatedDataVariants>; using AggregatedDataVariantsPtr = std::shared_ptr<AggregatedDataVariants>;

View File

@ -350,6 +350,26 @@ void Aggregator::compileIfPossible(AggregatedDataVariants::Type type)
AggregatedDataVariants::Type Aggregator::chooseAggregationMethod(const ConstColumnPlainPtrs & key_columns, Sizes & key_sizes) AggregatedDataVariants::Type Aggregator::chooseAggregationMethod(const ConstColumnPlainPtrs & key_columns, Sizes & key_sizes)
{ {
/// Check if at least one of the specified keys is nullable.
/// Create a set of nested key columns from the corresponding key columns.
/// Here "nested" means that, if a key column is nullable, we take its nested
/// column; otherwise we take the key column as is.
ConstColumnPlainPtrs nested_key_columns;
nested_key_columns.reserve(key_columns.size());
bool has_nullable_key = false;
for (const auto & col : key_columns)
{
if (col->isNullable())
{
const ColumnNullable & nullable_col = static_cast<const ColumnNullable &>(*col);
nested_key_columns.push_back(nullable_col.getNestedColumn().get());
has_nullable_key = true;
}
else
nested_key_columns.push_back(col);
}
/** Возвращает обычные (не two-level) методы, так как обработка начинается с них. /** Возвращает обычные (не two-level) методы, так как обработка начинается с них.
* Затем, в процессе работы, данные могут быть переконвертированы в two-level структуру, если их становится много. * Затем, в процессе работы, данные могут быть переконвертированы в two-level структуру, если их становится много.
*/ */
@ -364,16 +384,16 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod(const ConstColu
key_sizes.resize(params.keys_size); key_sizes.resize(params.keys_size);
for (size_t j = 0; j < params.keys_size; ++j) for (size_t j = 0; j < params.keys_size; ++j)
{ {
if (key_columns[j]->isFixed()) if (nested_key_columns[j]->isFixed())
{ {
key_sizes[j] = key_columns[j]->sizeOfField(); key_sizes[j] = nested_key_columns[j]->sizeOfField();
keys_bytes += key_sizes[j]; keys_bytes += key_sizes[j];
} }
else else
{ {
all_fixed = false; all_fixed = false;
if (const ColumnArray * arr = typeid_cast<const ColumnArray *>(key_columns[j])) if (const ColumnArray * arr = typeid_cast<const ColumnArray *>(nested_key_columns[j]))
{ {
++num_array_keys; ++num_array_keys;
@ -389,10 +409,47 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod(const ConstColu
if (params.keys_size == 0) if (params.keys_size == 0)
return AggregatedDataVariants::Type::without_key; return AggregatedDataVariants::Type::without_key;
/// Если есть один числовой ключ, который помещается в 64 бита if (has_nullable_key)
if (params.keys_size == 1 && key_columns[0]->isNumericNotNullable())
{ {
size_t size_of_field = key_columns[0]->sizeOfField(); /// At least one key is nullable. Therefore we choose an aggregation method
/// that takes into account this fact.
if ((params.keys_size == 1) && (nested_key_columns[0]->isNumeric()))
{
/// We have exactly one key and it is nullable. We shall add it a tag
/// which specifies whether its value is null or not.
size_t size_of_field = nested_key_columns[0]->sizeOfField();
if ((size_of_field == 1) || (size_of_field == 2) || (size_of_field == 4) || (size_of_field == 8))
return AggregatedDataVariants::Type::nullable_keys128;
else
throw Exception{"Logical error: numeric column has sizeOfField not in 1, 2, 4, 8.",
ErrorCodes::LOGICAL_ERROR};
}
/// Pack if possible all the keys along with information about which key values are nulls
/// into a fixed 16- or 32-byte blob.
if (keys_bytes > (std::numeric_limits<size_t>::max() - std::tuple_size<KeysNullMap<UInt128>>::value))
throw Exception{"Aggregator: keys sizes overflow", ErrorCodes::LOGICAL_ERROR};
if (all_fixed && ((std::tuple_size<KeysNullMap<UInt128>>::value + keys_bytes) <= 16))
return AggregatedDataVariants::Type::nullable_keys128;
if (all_fixed && ((std::tuple_size<KeysNullMap<UInt256>>::value + keys_bytes) <= 32))
return AggregatedDataVariants::Type::nullable_keys256;
/// Case when at least one key is an array. See comments below for the non-nullable
/// variant, since it is similar.
if ((num_array_keys > 1) || has_arrays_of_non_fixed_elems || ((num_array_keys == 1) && !all_non_array_keys_are_fixed))
return AggregatedDataVariants::Type::serialized;
/// Fallback case: we concatenate the keys along with information on which key values
/// are nulls.
return AggregatedDataVariants::Type::nullable_concat;
}
/// No key has been found to be nullable.
/// Если есть один числовой ключ, который помещается в 64 бита
if (params.keys_size == 1 && nested_key_columns[0]->isNumericNotNullable())
{
size_t size_of_field = nested_key_columns[0]->sizeOfField();
if (size_of_field == 1) if (size_of_field == 1)
return AggregatedDataVariants::Type::key8; return AggregatedDataVariants::Type::key8;
if (size_of_field == 2) if (size_of_field == 2)
@ -411,10 +468,10 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod(const ConstColu
return AggregatedDataVariants::Type::keys256; return AggregatedDataVariants::Type::keys256;
/// Если есть один строковый ключ, то используем хэш-таблицу с ним /// Если есть один строковый ключ, то используем хэш-таблицу с ним
if (params.keys_size == 1 && typeid_cast<const ColumnString *>(key_columns[0])) if (params.keys_size == 1 && typeid_cast<const ColumnString *>(nested_key_columns[0]))
return AggregatedDataVariants::Type::key_string; return AggregatedDataVariants::Type::key_string;
if (params.keys_size == 1 && typeid_cast<const ColumnFixedString *>(key_columns[0])) if (params.keys_size == 1 && typeid_cast<const ColumnFixedString *>(nested_key_columns[0]))
return AggregatedDataVariants::Type::key_fixed_string; return AggregatedDataVariants::Type::key_fixed_string;
/** Если есть массивы. /** Если есть массивы.