mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 09:32:06 +00:00
Merge branch 'master' of github.com:yandex/ClickHouse
This commit is contained in:
commit
509662a24d
@ -247,14 +247,6 @@ static inline StringRef * ALWAYS_INLINE extractKeysAndPlaceInPool(
|
||||
}
|
||||
|
||||
|
||||
/// Place the specified keys into a continuous memory chunk. The implementation
|
||||
/// of this function depends on whether some keys are nullable or not. See comments
|
||||
/// below for the specialized implementations.
|
||||
template <bool has_nullable_keys>
|
||||
static StringRef extractKeysAndPlaceInPoolContiguous(
|
||||
size_t i, size_t keys_size, const ConstColumnPlainPtrs & key_columns, StringRefs & keys, Arena & pool);
|
||||
|
||||
/// Implementation for the case when there are no nullable keys.
|
||||
/// Copy the specified keys to a continuous memory chunk of a pool.
|
||||
/// Subsequently append StringRef objects referring to each key.
|
||||
///
|
||||
@ -268,8 +260,7 @@ static StringRef extractKeysAndPlaceInPoolContiguous(
|
||||
///
|
||||
/// Return a StringRef object, referring to the area (1) of the memory
|
||||
/// chunk that contains the keys. In other words, we ignore their StringRefs.
|
||||
template <>
|
||||
inline StringRef ALWAYS_INLINE extractKeysAndPlaceInPoolContiguous<false>(
|
||||
inline StringRef ALWAYS_INLINE extractKeysAndPlaceInPoolContiguous(
|
||||
size_t i, size_t keys_size, const ConstColumnPlainPtrs & key_columns, StringRefs & keys, Arena & pool)
|
||||
{
|
||||
size_t sum_keys_size = 0;
|
||||
@ -295,93 +286,6 @@ inline StringRef ALWAYS_INLINE extractKeysAndPlaceInPoolContiguous<false>(
|
||||
return {res, sum_keys_size};
|
||||
}
|
||||
|
||||
/// Implementation for the case where there is at least one nullable key.
|
||||
/// Inside a continuous memory chunk of a pool, put a bitmap that indicates
|
||||
/// for each specified key whether its value is null or not. Copy the keys
|
||||
/// whose values are not nulls to the memory chunk. Subsequently append
|
||||
/// StringRef objects referring to each key, even those who contain a null.
|
||||
///
|
||||
/// [bitmap][key1][key2][key4]...[keyN][ref1][ref2][ref3 (null)]...[refN]
|
||||
/// : ^ ^ : | |
|
||||
/// : +-----|--------------:-----+ |
|
||||
/// : +--------------:-----------+
|
||||
/// : :
|
||||
/// <---------------------------->
|
||||
/// (1)
|
||||
///
|
||||
/// Return a StringRef object, referring to the area (1) of the memory
|
||||
/// chunk that contains the bitmap and the keys. In other words, we ignore
|
||||
/// the keys' StringRefs.
|
||||
template <>
|
||||
inline StringRef ALWAYS_INLINE extractKeysAndPlaceInPoolContiguous<true>(
|
||||
size_t i, size_t keys_size, const ConstColumnPlainPtrs & key_columns, StringRefs & keys, Arena & pool)
|
||||
{
|
||||
size_t bitmap_size = keys_size / 8;
|
||||
if ((keys_size % 8) != 0) { ++bitmap_size; }
|
||||
std::vector<UInt8> bitmap(bitmap_size);
|
||||
|
||||
/// Prepare the keys to be stored. Create the bitmap.
|
||||
size_t keys_bytes = 0;
|
||||
for (size_t j = 0; j < keys_size; ++j)
|
||||
{
|
||||
const IColumn * observed_column;
|
||||
bool is_null;
|
||||
|
||||
if (key_columns[j]->isNullable())
|
||||
{
|
||||
const ColumnNullable & nullable_col = static_cast<const ColumnNullable &>(*key_columns[j]);
|
||||
observed_column = nullable_col.getNestedColumn().get();
|
||||
const auto & null_map = nullable_col.getNullMap();
|
||||
is_null = null_map[i] == 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
observed_column = key_columns[j];
|
||||
is_null = false;
|
||||
}
|
||||
|
||||
if (is_null)
|
||||
{
|
||||
size_t bucket = j / 8;
|
||||
size_t offset = j % 8;
|
||||
bitmap[bucket] |= UInt8(1) << offset;
|
||||
|
||||
keys[j] = StringRef{};
|
||||
}
|
||||
else
|
||||
{
|
||||
keys[j] = observed_column->getDataAtWithTerminatingZero(i);
|
||||
keys_bytes += keys[j].size;
|
||||
}
|
||||
}
|
||||
|
||||
/// Allocate space for bitmap + non-null keys + StringRef objects.
|
||||
char * res = pool.alloc(bitmap_size + keys_bytes + keys_size * sizeof(StringRef));
|
||||
char * place = res;
|
||||
|
||||
/// Store the bitmap.
|
||||
memcpy(place, bitmap.data(), bitmap.size());
|
||||
place += bitmap.size();
|
||||
|
||||
/// Store the non-null keys data.
|
||||
for (size_t j = 0; j < keys_size; ++j)
|
||||
{
|
||||
size_t bucket = j / 8;
|
||||
size_t offset = j % 8;
|
||||
if (((bitmap[bucket] >> offset) & 1) == 0)
|
||||
{
|
||||
memcpy(place, keys[j].data, keys[j].size);
|
||||
keys[j].data = place;
|
||||
place += keys[j].size;
|
||||
}
|
||||
}
|
||||
|
||||
/// Store StringRef objects for all the keys, i.e. even for those
|
||||
/// whose value is null.
|
||||
memcpy(place, &keys[0], keys_size * sizeof(StringRef));
|
||||
|
||||
return {res, bitmap_size + keys_bytes};
|
||||
}
|
||||
|
||||
/** Сериализовать ключи в непрерывный кусок памяти.
|
||||
*/
|
||||
|
@ -488,7 +488,7 @@ struct AggregationMethodKeysFixed
|
||||
|
||||
|
||||
/// Агрегирует по конкатенации ключей. (При этом, строки, содержащие нули посередине, могут склеиться.)
|
||||
template <typename TData, bool has_nullable_keys_ = false>
|
||||
template <typename TData>
|
||||
struct AggregationMethodConcat
|
||||
{
|
||||
using Data = TData;
|
||||
@ -497,8 +497,6 @@ struct AggregationMethodConcat
|
||||
using iterator = typename Data::iterator;
|
||||
using const_iterator = typename Data::const_iterator;
|
||||
|
||||
static constexpr bool has_nullable_keys = has_nullable_keys_;
|
||||
|
||||
Data data;
|
||||
|
||||
AggregationMethodConcat() {}
|
||||
@ -520,7 +518,7 @@ struct AggregationMethodConcat
|
||||
StringRefs & keys,
|
||||
Arena & pool) const
|
||||
{
|
||||
return extractKeysAndPlaceInPoolContiguous<has_nullable_keys>(i, keys_size, key_columns, keys, pool);
|
||||
return extractKeysAndPlaceInPoolContiguous(i, keys_size, key_columns, keys, pool);
|
||||
}
|
||||
};
|
||||
|
||||
@ -541,15 +539,11 @@ struct AggregationMethodConcat
|
||||
|
||||
static void insertKeyIntoColumns(const typename Data::value_type & value, ColumnPlainPtrs & key_columns, size_t keys_size, const Sizes & key_sizes)
|
||||
{
|
||||
if (has_nullable_keys)
|
||||
insertKeyIntoNullableColumnsImpl(value, key_columns, keys_size, key_sizes);
|
||||
else
|
||||
insertKeyIntoColumnsImpl(value, key_columns, keys_size, key_sizes);
|
||||
}
|
||||
|
||||
private:
|
||||
/// Insert the values of the specified keys into the corresponding columns.
|
||||
/// Implementation for the case where there are no nullable keys.
|
||||
static void insertKeyIntoColumnsImpl(const typename Data::value_type & value, ColumnPlainPtrs & key_columns, size_t keys_size, const Sizes & key_sizes)
|
||||
{
|
||||
/// См. функцию extractKeysAndPlaceInPoolContiguous.
|
||||
@ -570,92 +564,6 @@ private:
|
||||
key_columns[i]->insertDataWithTerminatingZero(key_refs[i].data, key_refs[i].size);
|
||||
}
|
||||
}
|
||||
|
||||
/// Insert the value of the specified keys into the corresponding columns.
|
||||
/// Implementation for the case where there is at least one nullable key.
|
||||
static void insertKeyIntoNullableColumnsImpl(const typename Data::value_type & value, ColumnPlainPtrs & key_columns, size_t keys_size, const Sizes & key_sizes)
|
||||
{
|
||||
size_t compact_bitmap_size = keys_size / 8;
|
||||
if ((keys_size % 8) != 0) { ++compact_bitmap_size; }
|
||||
|
||||
if (unlikely(value.first.size < compact_bitmap_size))
|
||||
{
|
||||
/// This code path is logically impossible.
|
||||
/// Only a bug in the code base can trigger it.
|
||||
throw Exception{"Aggregator: corrupted hash table key", ErrorCodes::LOGICAL_ERROR};
|
||||
}
|
||||
else if (unlikely(value.first.size == compact_bitmap_size))
|
||||
{
|
||||
/// This case occurs when each of the keys falls into either of the following two
|
||||
/// categories: (i) it has a null value; (ii) it represents an empty array.
|
||||
/// The remarks are the same as for the implementation of the non-nullable case above.
|
||||
const UInt8 * compact_bitmap = reinterpret_cast<const UInt8 *>(value.first.data);
|
||||
|
||||
for (size_t i = 0; i < keys_size; ++i)
|
||||
{
|
||||
IColumn * observed_column;
|
||||
|
||||
if (key_columns[i]->isNullable())
|
||||
{
|
||||
ColumnNullable & nullable_col = static_cast<ColumnNullable &>(*key_columns[i]);
|
||||
observed_column = nullable_col.getNestedColumn().get();
|
||||
ColumnUInt8 & null_map = nullable_col.getNullMapConcreteColumn();
|
||||
|
||||
size_t bucket = i / 8;
|
||||
size_t offset = i % 8;
|
||||
UInt8 is_null = (compact_bitmap[bucket] >> offset) & 1;
|
||||
null_map.insert(is_null);
|
||||
}
|
||||
else
|
||||
observed_column = key_columns[i];
|
||||
|
||||
observed_column->insertDefault();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const UInt8 * compact_bitmap = reinterpret_cast<const UInt8 *>(value.first.data);
|
||||
const StringRef * key_refs = reinterpret_cast<const StringRef *>(value.first.data + value.first.size);
|
||||
|
||||
for (size_t i = 0; i < keys_size; ++i)
|
||||
{
|
||||
IColumn * observed_column;
|
||||
ColumnUInt8 * null_map;
|
||||
|
||||
/// If we have a nullable column, get its nested column and its null map.
|
||||
if (key_columns[i]->isNullable())
|
||||
{
|
||||
ColumnNullable & nullable_col = static_cast<ColumnNullable &>(*key_columns[i]);
|
||||
observed_column = nullable_col.getNestedColumn().get();
|
||||
null_map = &nullable_col.getNullMapConcreteColumn();
|
||||
}
|
||||
else
|
||||
{
|
||||
observed_column = key_columns[i];
|
||||
null_map = nullptr;
|
||||
}
|
||||
|
||||
bool is_null;
|
||||
if (key_columns[i]->isNullable())
|
||||
{
|
||||
/// The current column is nullable. Check if the value of the
|
||||
/// corresponding key is nullable. Update the null map accordingly.
|
||||
size_t bucket = i / 8;
|
||||
size_t offset = i % 8;
|
||||
UInt8 val = (compact_bitmap[bucket] >> offset) & 1;
|
||||
null_map->insert(val);
|
||||
is_null = val == 1;
|
||||
}
|
||||
else
|
||||
is_null = false;
|
||||
|
||||
if (is_null)
|
||||
observed_column->insertDefault();
|
||||
else
|
||||
observed_column->insertDataWithTerminatingZero(key_refs[i].data, key_refs[i].size);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -846,10 +754,8 @@ struct AggregatedDataVariants : private boost::noncopyable
|
||||
/// Support for nullable keys.
|
||||
std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128, true>> nullable_keys128;
|
||||
std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256, true>> nullable_keys256;
|
||||
std::unique_ptr<AggregationMethodConcat<AggregatedDataWithStringKey, true>> nullable_concat;
|
||||
std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128TwoLevel, true>> nullable_keys128_two_level;
|
||||
std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256TwoLevel, true>> nullable_keys256_two_level;
|
||||
std::unique_ptr<AggregationMethodConcat<AggregatedDataWithStringKeyTwoLevel, true>> nullable_concat_two_level;
|
||||
|
||||
/// В этом и подобных макросах, вариант without_key не учитывается.
|
||||
#define APPLY_FOR_AGGREGATED_VARIANTS(M) \
|
||||
@ -882,10 +788,8 @@ struct AggregatedDataVariants : private boost::noncopyable
|
||||
M(serialized_hash64, false) \
|
||||
M(nullable_keys128, false) \
|
||||
M(nullable_keys256, false) \
|
||||
M(nullable_concat, false) \
|
||||
M(nullable_keys128_two_level, true) \
|
||||
M(nullable_keys256_two_level, true) \
|
||||
M(nullable_concat_two_level, true)
|
||||
|
||||
enum class Type
|
||||
{
|
||||
@ -1005,7 +909,6 @@ struct AggregatedDataVariants : private boost::noncopyable
|
||||
M(serialized) \
|
||||
M(nullable_keys128) \
|
||||
M(nullable_keys256) \
|
||||
M(nullable_concat) \
|
||||
|
||||
#define APPLY_FOR_VARIANTS_NOT_CONVERTIBLE_TO_TWO_LEVEL(M) \
|
||||
M(key8) \
|
||||
@ -1050,8 +953,7 @@ struct AggregatedDataVariants : private boost::noncopyable
|
||||
M(concat_two_level) \
|
||||
M(serialized_two_level) \
|
||||
M(nullable_keys128_two_level) \
|
||||
M(nullable_keys256_two_level) \
|
||||
M(nullable_concat_two_level)
|
||||
M(nullable_keys256_two_level)
|
||||
};
|
||||
|
||||
using AggregatedDataVariantsPtr = std::shared_ptr<AggregatedDataVariants>;
|
||||
|
@ -461,17 +461,6 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod(const ConstColu
|
||||
return AggregatedDataVariants::Type::nullable_keys256;
|
||||
}
|
||||
|
||||
/// XXX Aggregation with Array(Nullable(T)) keys can be done much more efficiently.
|
||||
if (has_arrays_of_nullable)
|
||||
return AggregatedDataVariants::Type::serialized;
|
||||
|
||||
/// For the following two cases, see the comments below on the non-nullable variant,
|
||||
/// since it is similar.
|
||||
if (num_array_keys == 1 && !has_arrays_of_non_fixed_elems && all_non_array_keys_are_fixed)
|
||||
return AggregatedDataVariants::Type::nullable_concat;
|
||||
if (num_array_keys == 0 && !has_tuples)
|
||||
return AggregatedDataVariants::Type::nullable_concat;
|
||||
|
||||
/// Fallback case.
|
||||
return AggregatedDataVariants::Type::serialized;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user