mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-30 05:30:51 +00:00
dbms: Server: added support for aggregation with nullable keys [#METR-19266]
This commit is contained in:
parent
9fc95c5093
commit
2a0c842966
@ -11,6 +11,7 @@
|
||||
#include <DB/Columns/IColumn.h>
|
||||
#include <DB/Columns/ColumnsNumber.h>
|
||||
#include <DB/Columns/ColumnFixedString.h>
|
||||
#include <DB/Columns/ColumnNullable.h>
|
||||
|
||||
|
||||
template <>
|
||||
@ -30,6 +31,7 @@ using Sizes = std::vector<size_t>;
|
||||
/// ceil(T/8). Thus we must have: S = T + ceil(T/8). Below we indicate for
|
||||
/// each value of S, the corresponding value of T, and the bitmap size:
|
||||
///
|
||||
/// 32,28,4
|
||||
/// 16,14,2
|
||||
/// 8,7,1
|
||||
/// 4,3,1
|
||||
@ -43,6 +45,8 @@ template <typename T>
|
||||
constexpr auto getBitmapSize()
|
||||
{
|
||||
return
|
||||
(sizeof(T) == 32) ?
|
||||
4 :
|
||||
(sizeof(T) == 16) ?
|
||||
2 :
|
||||
((sizeof(T) == 8) ?
|
||||
@ -117,7 +121,7 @@ static inline T ALWAYS_INLINE packFixed(
|
||||
size_t offset = 0;
|
||||
|
||||
static constexpr auto bitmap_size = std::tuple_size<KeysNullMap<T>>::value;
|
||||
bool has_bitmap = bitmap_size > 0;
|
||||
static constexpr bool has_bitmap = bitmap_size > 0;
|
||||
|
||||
if (has_bitmap)
|
||||
{
|
||||
@ -234,17 +238,29 @@ static inline StringRef * ALWAYS_INLINE extractKeysAndPlaceInPool(
|
||||
}
|
||||
|
||||
|
||||
/** Скопировать ключи в пул в непрерывный кусок памяти.
|
||||
* Потом разместить в пуле StringRef-ы на них.
|
||||
*
|
||||
* [key1][key2]...[keyN][ref1][ref2]...[refN]
|
||||
* ^---------------------| |
|
||||
* ^---------------------|
|
||||
* ^---return-value----^
|
||||
*
|
||||
* Вернуть StringRef на кусок памяти с ключами (без учёта StringRef-ов после них).
|
||||
*/
|
||||
static inline StringRef ALWAYS_INLINE extractKeysAndPlaceInPoolContiguous(
|
||||
/// Place the specified keys into a continuous memory chunk. The implementation
|
||||
/// of this function depends on whether some keys are nullable or not. See comments
|
||||
/// below for the specialized implementations.
|
||||
template <bool has_nullable_keys>
|
||||
static StringRef extractKeysAndPlaceInPoolContiguous(
|
||||
size_t i, size_t keys_size, const ConstColumnPlainPtrs & key_columns, StringRefs & keys, Arena & pool);
|
||||
|
||||
/// Implementation for the case when there are no nullable keys.
|
||||
/// Copy the specified keys to a continuous memory chunk of a pool.
|
||||
/// Subsequently append StringRef objects referring to each key.
|
||||
///
|
||||
/// [key1][key2]...[keyN][ref1][ref2]...[refN]
|
||||
/// ^ ^ : | |
|
||||
/// +-----|--------:-----+ |
|
||||
/// : +--------:-----------+
|
||||
/// : :
|
||||
/// <-------------->
|
||||
/// (1)
|
||||
///
|
||||
/// Return a StringRef object, referring to the area (1) of the memory
|
||||
/// chunk that contains the keys. In other words, we ignore their StringRefs.
|
||||
template <>
|
||||
inline StringRef ALWAYS_INLINE extractKeysAndPlaceInPoolContiguous<false>(
|
||||
size_t i, size_t keys_size, const ConstColumnPlainPtrs & key_columns, StringRefs & keys, Arena & pool)
|
||||
{
|
||||
size_t sum_keys_size = 0;
|
||||
@ -270,6 +286,93 @@ static inline StringRef ALWAYS_INLINE extractKeysAndPlaceInPoolContiguous(
|
||||
return {res, sum_keys_size};
|
||||
}
|
||||
|
||||
/// Implementation for the case where there is at least one nullable key.
|
||||
/// Inside a continuous memory chunk of a pool, put a bitmap that indicates
|
||||
/// for each specified key whether its value is null or not. Copy the keys
|
||||
/// whose values are not nulls to the memory chunk. Subsequently append
|
||||
/// StringRef objects referring to each key, even those who contain a null.
|
||||
///
|
||||
/// [bitmap][key1][key2][key4]...[keyN][ref1][ref2][ref3 (null)]...[refN]
|
||||
/// : ^ ^ : | |
|
||||
/// : +-----|--------------:-----+ |
|
||||
/// : +--------------:-----------+
|
||||
/// : :
|
||||
/// <---------------------------->
|
||||
/// (1)
|
||||
///
|
||||
/// Return a StringRef object, referring to the area (1) of the memory
|
||||
/// chunk that contains the bitmap and the keys. In other words, we ignore
|
||||
/// the keys' StringRefs.
|
||||
template <>
|
||||
inline StringRef ALWAYS_INLINE extractKeysAndPlaceInPoolContiguous<true>(
|
||||
size_t i, size_t keys_size, const ConstColumnPlainPtrs & key_columns, StringRefs & keys, Arena & pool)
|
||||
{
|
||||
size_t bitmap_size = keys_size / 8;
|
||||
if ((keys_size % 8) != 0) { ++bitmap_size; }
|
||||
std::vector<UInt8> bitmap(bitmap_size);
|
||||
|
||||
/// Prepare the keys to be stored. Create the bitmap.
|
||||
size_t keys_bytes = 0;
|
||||
for (size_t j = 0; j < keys_size; ++j)
|
||||
{
|
||||
const IColumn * observed_column;
|
||||
bool is_null;
|
||||
|
||||
if (key_columns[j]->isNullable())
|
||||
{
|
||||
const ColumnNullable & nullable_col = static_cast<const ColumnNullable &>(*key_columns[j]);
|
||||
observed_column = nullable_col.getNestedColumn().get();
|
||||
const auto & null_map = static_cast<const ColumnUInt8 &>(*nullable_col.getNullValuesByteMap()).getData();
|
||||
is_null = null_map[i] == 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
observed_column = key_columns[j];
|
||||
is_null = false;
|
||||
}
|
||||
|
||||
if (is_null)
|
||||
{
|
||||
size_t bucket = j / 8;
|
||||
size_t offset = j % 8;
|
||||
bitmap[bucket] |= UInt8(1) << offset;
|
||||
|
||||
keys[j] = StringRef{};
|
||||
}
|
||||
else
|
||||
{
|
||||
keys[j] = observed_column->getDataAtWithTerminatingZero(i);
|
||||
keys_bytes += keys[j].size;
|
||||
}
|
||||
}
|
||||
|
||||
/// Allocate space for bitmap + non-null keys + StringRef objects.
|
||||
char * res = pool.alloc(bitmap_size + keys_bytes + keys_size * sizeof(StringRef));
|
||||
char * place = res;
|
||||
|
||||
/// Store the bitmap.
|
||||
memcpy(place, bitmap.data(), bitmap.size());
|
||||
place += bitmap.size();
|
||||
|
||||
/// Store the non-null keys data.
|
||||
for (size_t j = 0; j < keys_size; ++j)
|
||||
{
|
||||
size_t bucket = j / 8;
|
||||
size_t offset = j % 8;
|
||||
if (((bitmap[bucket] >> offset) & 1) == 0)
|
||||
{
|
||||
memcpy(place, keys[j].data, keys[j].size);
|
||||
keys[j].data = place;
|
||||
place += keys[j].size;
|
||||
}
|
||||
}
|
||||
|
||||
/// Store StringRef objects for all the keys, i.e. even for those
|
||||
/// whose value is null.
|
||||
memcpy(place, &keys[0], keys_size * sizeof(StringRef));
|
||||
|
||||
return {res, bitmap_size + keys_bytes};
|
||||
}
|
||||
|
||||
/** Сериализовать ключи в непрерывный кусок памяти.
|
||||
*/
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include <DB/Columns/ColumnFixedString.h>
|
||||
#include <DB/Columns/ColumnAggregateFunction.h>
|
||||
#include <DB/Columns/ColumnVector.h>
|
||||
#include <DB/Columns/ColumnNullable.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -265,9 +266,102 @@ struct AggregationMethodFixedString
|
||||
}
|
||||
};
|
||||
|
||||
/// This class is designed to provide the functionality that is required for
|
||||
/// supporting nullable keys in AggregationMethodKeysFixed. If there are
|
||||
/// no nullable keys, this class is merely implemented as an empty shell.
|
||||
template <typename Key, bool has_nullable_keys>
|
||||
class BaseStateKeysFixed;
|
||||
|
||||
/// Case where nullable keys are supported.
|
||||
template <typename Key>
|
||||
class BaseStateKeysFixed<Key, true>
|
||||
{
|
||||
protected:
|
||||
void init(const ConstColumnPlainPtrs & key_columns)
|
||||
{
|
||||
null_maps.reserve(key_columns.size());
|
||||
actual_columns.reserve(key_columns.size());
|
||||
|
||||
for (const auto & col : key_columns)
|
||||
{
|
||||
if (col->isNullable())
|
||||
{
|
||||
const auto & nullable_col = static_cast<const ColumnNullable &>(*col);
|
||||
actual_columns.push_back(nullable_col.getNestedColumn().get());
|
||||
null_maps.push_back(nullable_col.getNullValuesByteMap().get());
|
||||
has_nullable_columns = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
actual_columns.push_back(col);
|
||||
null_maps.push_back(nullptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the columns which actually contain the values of the keys.
|
||||
/// For a given key column, if it is nullable, we return its nested
|
||||
/// column. Otherwise we return the key column itself.
|
||||
inline const ConstColumnPlainPtrs & getActualColumns() const
|
||||
{
|
||||
return actual_columns;
|
||||
}
|
||||
|
||||
/// Create a bitmap that indicates whether, for a particular row,
|
||||
/// a key column bears a null value or not.
|
||||
KeysNullMap<Key> createBitmap(size_t row) const
|
||||
{
|
||||
KeysNullMap<Key> bitmap{};
|
||||
|
||||
for (size_t k = 0; k < null_maps.size(); ++k)
|
||||
{
|
||||
if (null_maps[k] != nullptr)
|
||||
{
|
||||
const auto & null_map = static_cast<const ColumnUInt8 &>(*null_maps[k]).getData();
|
||||
if (null_map[row] == 1)
|
||||
{
|
||||
size_t bucket = k / 8;
|
||||
size_t offset = k % 8;
|
||||
bitmap[bucket] |= UInt8(1) << offset;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return bitmap;
|
||||
}
|
||||
|
||||
private:
|
||||
ConstColumnPlainPtrs actual_columns;
|
||||
ConstColumnPlainPtrs null_maps;
|
||||
bool has_nullable_columns = false;
|
||||
};
|
||||
|
||||
/// Case where nullable keys are not supported.
|
||||
template <typename Key>
|
||||
class BaseStateKeysFixed<Key, false>
|
||||
{
|
||||
protected:
|
||||
void init(const ConstColumnPlainPtrs & key_columns)
|
||||
{
|
||||
throw Exception{"Internal error: calling init() for non-nullable"
|
||||
" keys is forbidden", ErrorCodes::LOGICAL_ERROR};
|
||||
}
|
||||
|
||||
const ConstColumnPlainPtrs & getActualColumns() const
|
||||
{
|
||||
throw Exception{"Internal error: calling getActualColumns() for non-nullable"
|
||||
" keys is forbidden", ErrorCodes::LOGICAL_ERROR};
|
||||
}
|
||||
|
||||
KeysNullMap<Key> createBitmap(size_t row) const
|
||||
{
|
||||
throw Exception{"Internal error: calling createBitmap() for non-nullable keys"
|
||||
" is forbidden", ErrorCodes::LOGICAL_ERROR};
|
||||
}
|
||||
};
|
||||
|
||||
/// Для случая, когда все ключи фиксированной длины, и они помещаются в N (например, 128) бит.
|
||||
template <typename TData>
|
||||
template <typename TData, bool has_nullable_keys_ = false>
|
||||
struct AggregationMethodKeysFixed
|
||||
{
|
||||
using Data = TData;
|
||||
@ -275,6 +369,7 @@ struct AggregationMethodKeysFixed
|
||||
using Mapped = typename Data::mapped_type;
|
||||
using iterator = typename Data::iterator;
|
||||
using const_iterator = typename Data::const_iterator;
|
||||
static constexpr bool has_nullable_keys = has_nullable_keys_;
|
||||
|
||||
Data data;
|
||||
|
||||
@ -283,10 +378,15 @@ struct AggregationMethodKeysFixed
|
||||
template <typename Other>
|
||||
AggregationMethodKeysFixed(const Other & other) : data(other.data) {}
|
||||
|
||||
struct State
|
||||
class State final : private BaseStateKeysFixed<Key, has_nullable_keys>
|
||||
{
|
||||
public:
|
||||
using Base = BaseStateKeysFixed<Key, has_nullable_keys>;
|
||||
|
||||
void init(ConstColumnPlainPtrs & key_columns)
|
||||
{
|
||||
if (has_nullable_keys)
|
||||
Base::init(key_columns);
|
||||
}
|
||||
|
||||
Key getKey(
|
||||
@ -297,7 +397,13 @@ struct AggregationMethodKeysFixed
|
||||
StringRefs & keys,
|
||||
Arena & pool) const
|
||||
{
|
||||
return packFixed<Key>(i, keys_size, key_columns, key_sizes);
|
||||
if (has_nullable_keys)
|
||||
{
|
||||
auto bitmap = Base::createBitmap(i);
|
||||
return packFixed<Key>(i, keys_size, Base::getActualColumns(), key_sizes, bitmap);
|
||||
}
|
||||
else
|
||||
return packFixed<Key>(i, keys_size, key_columns, key_sizes);
|
||||
}
|
||||
};
|
||||
|
||||
@ -314,19 +420,57 @@ struct AggregationMethodKeysFixed
|
||||
|
||||
static void insertKeyIntoColumns(const typename Data::value_type & value, ColumnPlainPtrs & key_columns, size_t keys_size, const Sizes & key_sizes)
|
||||
{
|
||||
size_t offset = 0;
|
||||
static constexpr auto bitmap_size = has_nullable_keys ? std::tuple_size<KeysNullMap<Key>>::value : 0;
|
||||
/// In any hash key value, column values to be read start just after the bitmap, if it exists.
|
||||
size_t offset = bitmap_size;
|
||||
|
||||
for (size_t i = 0; i < keys_size; ++i)
|
||||
{
|
||||
size_t size = key_sizes[i];
|
||||
key_columns[i]->insertData(reinterpret_cast<const char *>(&value.first) + offset, size);
|
||||
offset += size;
|
||||
IColumn * observed_column;
|
||||
ColumnUInt8 * null_map;
|
||||
|
||||
/// If we have a nullable column, get its nested column and its null map.
|
||||
if (has_nullable_keys && key_columns[i]->isNullable())
|
||||
{
|
||||
ColumnNullable & nullable_col = static_cast<ColumnNullable &>(*key_columns[i]);
|
||||
observed_column = nullable_col.getNestedColumn().get();
|
||||
null_map = static_cast<ColumnUInt8 *>(nullable_col.getNullValuesByteMap().get());
|
||||
}
|
||||
else
|
||||
{
|
||||
observed_column = key_columns[i];
|
||||
null_map = nullptr;
|
||||
}
|
||||
|
||||
bool is_null;
|
||||
if (has_nullable_keys && key_columns[i]->isNullable())
|
||||
{
|
||||
/// The current column is nullable. Check if the value of the
|
||||
/// corresponding key is nullable. Update the null map accordingly.
|
||||
size_t bucket = i / 8;
|
||||
size_t offset = i % 8;
|
||||
bool val = (reinterpret_cast<const char *>(&value.first)[bucket] >> offset) & 1;
|
||||
null_map->insert(val);
|
||||
is_null = val == 1;
|
||||
}
|
||||
else
|
||||
is_null = false;
|
||||
|
||||
if (has_nullable_keys && is_null)
|
||||
observed_column->insertDefault();
|
||||
else
|
||||
{
|
||||
size_t size = key_sizes[i];
|
||||
observed_column->insertData(reinterpret_cast<const char *>(&value.first) + offset, size);
|
||||
offset += size;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/// Агрегирует по конкатенации ключей. (При этом, строки, содержащие нули посередине, могут склеиться.)
|
||||
template <typename TData>
|
||||
template <typename TData, bool has_nullable_keys_ = false>
|
||||
struct AggregationMethodConcat
|
||||
{
|
||||
using Data = TData;
|
||||
@ -335,6 +479,8 @@ struct AggregationMethodConcat
|
||||
using iterator = typename Data::iterator;
|
||||
using const_iterator = typename Data::const_iterator;
|
||||
|
||||
static constexpr bool has_nullable_keys = has_nullable_keys_;
|
||||
|
||||
Data data;
|
||||
|
||||
AggregationMethodConcat() {}
|
||||
@ -356,7 +502,7 @@ struct AggregationMethodConcat
|
||||
StringRefs & keys,
|
||||
Arena & pool) const
|
||||
{
|
||||
return extractKeysAndPlaceInPoolContiguous(i, keys_size, key_columns, keys, pool);
|
||||
return extractKeysAndPlaceInPoolContiguous<has_nullable_keys>(i, keys_size, key_columns, keys, pool);
|
||||
}
|
||||
};
|
||||
|
||||
@ -376,6 +522,17 @@ struct AggregationMethodConcat
|
||||
static const bool no_consecutive_keys_optimization = true;
|
||||
|
||||
static void insertKeyIntoColumns(const typename Data::value_type & value, ColumnPlainPtrs & key_columns, size_t keys_size, const Sizes & key_sizes)
|
||||
{
|
||||
if (has_nullable_keys)
|
||||
insertKeyIntoNullableColumnsImpl(value, key_columns, keys_size, key_sizes);
|
||||
else
|
||||
insertKeyIntoColumnsImpl(value, key_columns, keys_size, key_sizes);
|
||||
}
|
||||
|
||||
private:
|
||||
/// Insert the values of the specified keys into the corresponding columns.
|
||||
/// Implementation for the case where there are no nullable keys.
|
||||
static void insertKeyIntoColumnsImpl(const typename Data::value_type & value, ColumnPlainPtrs & key_columns, size_t keys_size, const Sizes & key_sizes)
|
||||
{
|
||||
/// См. функцию extractKeysAndPlaceInPoolContiguous.
|
||||
const StringRef * key_refs = reinterpret_cast<const StringRef *>(value.first.data + value.first.size);
|
||||
@ -383,9 +540,9 @@ struct AggregationMethodConcat
|
||||
if (unlikely(0 == value.first.size))
|
||||
{
|
||||
/** Исправление, если все ключи - пустые массивы. Для них в хэш-таблицу записывается StringRef нулевой длины, но с ненулевым указателем.
|
||||
* Но при вставке в хэш-таблицу, такой StringRef оказывается равен другому ключу нулевой длины,
|
||||
* у которого указатель на данные может быть любым мусором и использовать его нельзя.
|
||||
*/
|
||||
* Но при вставке в хэш-таблицу, такой StringRef оказывается равен другому ключу нулевой длины,
|
||||
* у которого указатель на данные может быть любым мусором и использовать его нельзя.
|
||||
*/
|
||||
for (size_t i = 0; i < keys_size; ++i)
|
||||
key_columns[i]->insertDefault();
|
||||
}
|
||||
@ -395,6 +552,92 @@ struct AggregationMethodConcat
|
||||
key_columns[i]->insertDataWithTerminatingZero(key_refs[i].data, key_refs[i].size);
|
||||
}
|
||||
}
|
||||
|
||||
/// Insert the value of the specified keys into the corresponding columns.
|
||||
/// Implementation for the case where there is at least one nullable key.
|
||||
static void insertKeyIntoNullableColumnsImpl(const typename Data::value_type & value, ColumnPlainPtrs & key_columns, size_t keys_size, const Sizes & key_sizes)
|
||||
{
|
||||
size_t compact_bitmap_size = keys_size / 8;
|
||||
if ((keys_size % 8) != 0) { ++compact_bitmap_size; }
|
||||
|
||||
if (unlikely(value.first.size < compact_bitmap_size))
|
||||
{
|
||||
/// This code path is logically impossible.
|
||||
/// Only a bug in the code base can trigger it.
|
||||
throw Exception{"Aggregator: corrupted hash table key", ErrorCodes::LOGICAL_ERROR};
|
||||
}
|
||||
else if (unlikely(value.first.size == compact_bitmap_size))
|
||||
{
|
||||
/// This case occurs when each of the keys falls into either of the following two
|
||||
/// categories: (i) it has a null value; (ii) it represents an empty array.
|
||||
/// The remarks are the same as for the implementation of the non-nullable case above.
|
||||
const UInt8 * compact_bitmap = reinterpret_cast<const UInt8 *>(value.first.data);
|
||||
|
||||
for (size_t i = 0; i < keys_size; ++i)
|
||||
{
|
||||
IColumn * observed_column;
|
||||
|
||||
if (key_columns[i]->isNullable())
|
||||
{
|
||||
ColumnNullable & nullable_col = static_cast<ColumnNullable &>(*key_columns[i]);
|
||||
observed_column = nullable_col.getNestedColumn().get();
|
||||
ColumnUInt8 & null_map = static_cast<ColumnUInt8 &>(*nullable_col.getNullValuesByteMap());
|
||||
|
||||
size_t bucket = i / 8;
|
||||
size_t offset = i % 8;
|
||||
UInt8 is_null = (compact_bitmap[bucket] >> offset) & 1;
|
||||
null_map.insert(is_null);
|
||||
}
|
||||
else
|
||||
observed_column = key_columns[i];
|
||||
|
||||
observed_column->insertDefault();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const UInt8 * compact_bitmap = reinterpret_cast<const UInt8 *>(value.first.data);
|
||||
const StringRef * key_refs = reinterpret_cast<const StringRef *>(value.first.data + value.first.size);
|
||||
|
||||
for (size_t i = 0; i < keys_size; ++i)
|
||||
{
|
||||
IColumn * observed_column;
|
||||
ColumnUInt8 * null_map;
|
||||
|
||||
/// If we have a nullable column, get its nested column and its null map.
|
||||
if (key_columns[i]->isNullable())
|
||||
{
|
||||
ColumnNullable & nullable_col = static_cast<ColumnNullable &>(*key_columns[i]);
|
||||
observed_column = nullable_col.getNestedColumn().get();
|
||||
null_map = static_cast<ColumnUInt8 *>(nullable_col.getNullValuesByteMap().get());
|
||||
}
|
||||
else
|
||||
{
|
||||
observed_column = key_columns[i];
|
||||
null_map = nullptr;
|
||||
}
|
||||
|
||||
bool is_null;
|
||||
if (key_columns[i]->isNullable())
|
||||
{
|
||||
/// The current column is nullable. Check if the value of the
|
||||
/// corresponding key is nullable. Update the null map accordingly.
|
||||
size_t bucket = i / 8;
|
||||
size_t offset = i % 8;
|
||||
UInt8 val = (compact_bitmap[bucket] >> offset) & 1;
|
||||
null_map->insert(val);
|
||||
is_null = val == 1;
|
||||
}
|
||||
else
|
||||
is_null = false;
|
||||
|
||||
if (is_null)
|
||||
observed_column->insertDefault();
|
||||
else
|
||||
observed_column->insertDataWithTerminatingZero(key_refs[i].data, key_refs[i].size);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -574,6 +817,14 @@ struct AggregatedDataVariants : private boost::noncopyable
|
||||
std::unique_ptr<AggregationMethodConcat<AggregatedDataWithStringKeyTwoLevel>> concat_two_level;
|
||||
std::unique_ptr<AggregationMethodSerialized<AggregatedDataWithStringKeyTwoLevel>> serialized_two_level;
|
||||
|
||||
/// Support for nullable keys.
|
||||
std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128, true>> nullable_keys128;
|
||||
std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256, true>> nullable_keys256;
|
||||
std::unique_ptr<AggregationMethodConcat<AggregatedDataWithStringKey, true>> nullable_concat;
|
||||
std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128TwoLevel, true>> nullable_keys128_two_level;
|
||||
std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256TwoLevel, true>> nullable_keys256_two_level;
|
||||
std::unique_ptr<AggregationMethodConcat<AggregatedDataWithStringKeyTwoLevel, true>> nullable_concat_two_level;
|
||||
|
||||
/// В этом и подобных макросах, вариант without_key не учитывается.
|
||||
#define APPLY_FOR_AGGREGATED_VARIANTS(M) \
|
||||
M(key8, false) \
|
||||
@ -596,6 +847,12 @@ struct AggregatedDataVariants : private boost::noncopyable
|
||||
M(hashed_two_level, true) \
|
||||
M(concat_two_level, true) \
|
||||
M(serialized_two_level, true) \
|
||||
M(nullable_keys128, false) \
|
||||
M(nullable_keys256, false) \
|
||||
M(nullable_concat, false) \
|
||||
M(nullable_keys128_two_level, true) \
|
||||
M(nullable_keys256_two_level, true) \
|
||||
M(nullable_concat_two_level, true)
|
||||
|
||||
enum class Type
|
||||
{
|
||||
@ -713,6 +970,9 @@ struct AggregatedDataVariants : private boost::noncopyable
|
||||
M(hashed) \
|
||||
M(concat) \
|
||||
M(serialized) \
|
||||
M(nullable_keys128) \
|
||||
M(nullable_keys256) \
|
||||
M(nullable_concat) \
|
||||
|
||||
#define APPLY_FOR_VARIANTS_NOT_CONVERTIBLE_TO_TWO_LEVEL(M) \
|
||||
M(key8) \
|
||||
@ -748,7 +1008,10 @@ struct AggregatedDataVariants : private boost::noncopyable
|
||||
M(keys256_two_level) \
|
||||
M(hashed_two_level) \
|
||||
M(concat_two_level) \
|
||||
M(serialized_two_level)
|
||||
M(serialized_two_level) \
|
||||
M(nullable_keys128_two_level) \
|
||||
M(nullable_keys256_two_level) \
|
||||
M(nullable_concat_two_level)
|
||||
};
|
||||
|
||||
using AggregatedDataVariantsPtr = std::shared_ptr<AggregatedDataVariants>;
|
||||
|
@ -350,6 +350,26 @@ void Aggregator::compileIfPossible(AggregatedDataVariants::Type type)
|
||||
|
||||
AggregatedDataVariants::Type Aggregator::chooseAggregationMethod(const ConstColumnPlainPtrs & key_columns, Sizes & key_sizes)
|
||||
{
|
||||
/// Check if at least one of the specified keys is nullable.
|
||||
/// Create a set of nested key columns from the corresponding key columns.
|
||||
/// Here "nested" means that, if a key column is nullable, we take its nested
|
||||
/// column; otherwise we take the key column as is.
|
||||
ConstColumnPlainPtrs nested_key_columns;
|
||||
nested_key_columns.reserve(key_columns.size());
|
||||
bool has_nullable_key = false;
|
||||
|
||||
for (const auto & col : key_columns)
|
||||
{
|
||||
if (col->isNullable())
|
||||
{
|
||||
const ColumnNullable & nullable_col = static_cast<const ColumnNullable &>(*col);
|
||||
nested_key_columns.push_back(nullable_col.getNestedColumn().get());
|
||||
has_nullable_key = true;
|
||||
}
|
||||
else
|
||||
nested_key_columns.push_back(col);
|
||||
}
|
||||
|
||||
/** Возвращает обычные (не two-level) методы, так как обработка начинается с них.
|
||||
* Затем, в процессе работы, данные могут быть переконвертированы в two-level структуру, если их становится много.
|
||||
*/
|
||||
@ -364,16 +384,16 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod(const ConstColu
|
||||
key_sizes.resize(params.keys_size);
|
||||
for (size_t j = 0; j < params.keys_size; ++j)
|
||||
{
|
||||
if (key_columns[j]->isFixed())
|
||||
if (nested_key_columns[j]->isFixed())
|
||||
{
|
||||
key_sizes[j] = key_columns[j]->sizeOfField();
|
||||
key_sizes[j] = nested_key_columns[j]->sizeOfField();
|
||||
keys_bytes += key_sizes[j];
|
||||
}
|
||||
else
|
||||
{
|
||||
all_fixed = false;
|
||||
|
||||
if (const ColumnArray * arr = typeid_cast<const ColumnArray *>(key_columns[j]))
|
||||
if (const ColumnArray * arr = typeid_cast<const ColumnArray *>(nested_key_columns[j]))
|
||||
{
|
||||
++num_array_keys;
|
||||
|
||||
@ -389,10 +409,47 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod(const ConstColu
|
||||
if (params.keys_size == 0)
|
||||
return AggregatedDataVariants::Type::without_key;
|
||||
|
||||
/// Если есть один числовой ключ, который помещается в 64 бита
|
||||
if (params.keys_size == 1 && key_columns[0]->isNumericNotNullable())
|
||||
if (has_nullable_key)
|
||||
{
|
||||
size_t size_of_field = key_columns[0]->sizeOfField();
|
||||
/// At least one key is nullable. Therefore we choose an aggregation method
|
||||
/// that takes into account this fact.
|
||||
if ((params.keys_size == 1) && (nested_key_columns[0]->isNumeric()))
|
||||
{
|
||||
/// We have exactly one key and it is nullable. We shall add it a tag
|
||||
/// which specifies whether its value is null or not.
|
||||
size_t size_of_field = nested_key_columns[0]->sizeOfField();
|
||||
if ((size_of_field == 1) || (size_of_field == 2) || (size_of_field == 4) || (size_of_field == 8))
|
||||
return AggregatedDataVariants::Type::nullable_keys128;
|
||||
else
|
||||
throw Exception{"Logical error: numeric column has sizeOfField not in 1, 2, 4, 8.",
|
||||
ErrorCodes::LOGICAL_ERROR};
|
||||
}
|
||||
|
||||
/// Pack if possible all the keys along with information about which key values are nulls
|
||||
/// into a fixed 16- or 32-byte blob.
|
||||
if (keys_bytes > (std::numeric_limits<size_t>::max() - std::tuple_size<KeysNullMap<UInt128>>::value))
|
||||
throw Exception{"Aggregator: keys sizes overflow", ErrorCodes::LOGICAL_ERROR};
|
||||
if (all_fixed && ((std::tuple_size<KeysNullMap<UInt128>>::value + keys_bytes) <= 16))
|
||||
return AggregatedDataVariants::Type::nullable_keys128;
|
||||
if (all_fixed && ((std::tuple_size<KeysNullMap<UInt256>>::value + keys_bytes) <= 32))
|
||||
return AggregatedDataVariants::Type::nullable_keys256;
|
||||
|
||||
/// Case when at least one key is an array. See comments below for the non-nullable
|
||||
/// variant, since it is similar.
|
||||
if ((num_array_keys > 1) || has_arrays_of_non_fixed_elems || ((num_array_keys == 1) && !all_non_array_keys_are_fixed))
|
||||
return AggregatedDataVariants::Type::serialized;
|
||||
|
||||
/// Fallback case: we concatenate the keys along with information on which key values
|
||||
/// are nulls.
|
||||
return AggregatedDataVariants::Type::nullable_concat;
|
||||
}
|
||||
|
||||
/// No key has been found to be nullable.
|
||||
|
||||
/// Если есть один числовой ключ, который помещается в 64 бита
|
||||
if (params.keys_size == 1 && nested_key_columns[0]->isNumericNotNullable())
|
||||
{
|
||||
size_t size_of_field = nested_key_columns[0]->sizeOfField();
|
||||
if (size_of_field == 1)
|
||||
return AggregatedDataVariants::Type::key8;
|
||||
if (size_of_field == 2)
|
||||
@ -411,10 +468,10 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod(const ConstColu
|
||||
return AggregatedDataVariants::Type::keys256;
|
||||
|
||||
/// Если есть один строковый ключ, то используем хэш-таблицу с ним
|
||||
if (params.keys_size == 1 && typeid_cast<const ColumnString *>(key_columns[0]))
|
||||
if (params.keys_size == 1 && typeid_cast<const ColumnString *>(nested_key_columns[0]))
|
||||
return AggregatedDataVariants::Type::key_string;
|
||||
|
||||
if (params.keys_size == 1 && typeid_cast<const ColumnFixedString *>(key_columns[0]))
|
||||
if (params.keys_size == 1 && typeid_cast<const ColumnFixedString *>(nested_key_columns[0]))
|
||||
return AggregatedDataVariants::Type::key_fixed_string;
|
||||
|
||||
/** Если есть массивы.
|
||||
|
Loading…
Reference in New Issue
Block a user