Merge branch 'master' of github.com:yandex/ClickHouse

This commit is contained in:
Alexey Milovidov 2017-01-30 10:05:57 +03:00
commit 509662a24d
3 changed files with 5 additions and 210 deletions

View File

@ -247,14 +247,6 @@ static inline StringRef * ALWAYS_INLINE extractKeysAndPlaceInPool(
}
/// Place the specified keys into a continuous memory chunk. The implementation
/// of this function depends on whether some keys are nullable or not. See comments
/// below for the specialized implementations.
template <bool has_nullable_keys>
static StringRef extractKeysAndPlaceInPoolContiguous(
size_t i, size_t keys_size, const ConstColumnPlainPtrs & key_columns, StringRefs & keys, Arena & pool);
/// Implementation for the case when there are no nullable keys.
/// Copy the specified keys to a continuous memory chunk of a pool.
/// Subsequently append StringRef objects referring to each key.
///
@ -268,8 +260,7 @@ static StringRef extractKeysAndPlaceInPoolContiguous(
///
/// Return a StringRef object, referring to the area (1) of the memory
/// chunk that contains the keys. In other words, we ignore their StringRefs.
template <>
inline StringRef ALWAYS_INLINE extractKeysAndPlaceInPoolContiguous<false>(
inline StringRef ALWAYS_INLINE extractKeysAndPlaceInPoolContiguous(
size_t i, size_t keys_size, const ConstColumnPlainPtrs & key_columns, StringRefs & keys, Arena & pool)
{
size_t sum_keys_size = 0;
@ -295,93 +286,6 @@ inline StringRef ALWAYS_INLINE extractKeysAndPlaceInPoolContiguous<false>(
return {res, sum_keys_size};
}
/// Implementation for the case where there is at least one nullable key.
/// Inside a continuous memory chunk of a pool, put a bitmap that indicates
/// for each specified key whether its value is null or not. Copy the keys
/// whose values are not nulls to the memory chunk. Subsequently append
/// StringRef objects referring to each key, even those who contain a null.
///
/// [bitmap][key1][key2][key4]...[keyN][ref1][ref2][ref3 (null)]...[refN]
/// : ^ ^ : | |
/// : +-----|--------------:-----+ |
/// : +--------------:-----------+
/// : :
/// <---------------------------->
/// (1)
///
/// Return a StringRef object, referring to the area (1) of the memory
/// chunk that contains the bitmap and the keys. In other words, we ignore
/// the keys' StringRefs.
template <>
inline StringRef ALWAYS_INLINE extractKeysAndPlaceInPoolContiguous<true>(
size_t i, size_t keys_size, const ConstColumnPlainPtrs & key_columns, StringRefs & keys, Arena & pool)
{
size_t bitmap_size = keys_size / 8;
if ((keys_size % 8) != 0) { ++bitmap_size; }
std::vector<UInt8> bitmap(bitmap_size);
/// Prepare the keys to be stored. Create the bitmap.
size_t keys_bytes = 0;
for (size_t j = 0; j < keys_size; ++j)
{
const IColumn * observed_column;
bool is_null;
if (key_columns[j]->isNullable())
{
const ColumnNullable & nullable_col = static_cast<const ColumnNullable &>(*key_columns[j]);
observed_column = nullable_col.getNestedColumn().get();
const auto & null_map = nullable_col.getNullMap();
is_null = null_map[i] == 1;
}
else
{
observed_column = key_columns[j];
is_null = false;
}
if (is_null)
{
size_t bucket = j / 8;
size_t offset = j % 8;
bitmap[bucket] |= UInt8(1) << offset;
keys[j] = StringRef{};
}
else
{
keys[j] = observed_column->getDataAtWithTerminatingZero(i);
keys_bytes += keys[j].size;
}
}
/// Allocate space for bitmap + non-null keys + StringRef objects.
char * res = pool.alloc(bitmap_size + keys_bytes + keys_size * sizeof(StringRef));
char * place = res;
/// Store the bitmap.
memcpy(place, bitmap.data(), bitmap.size());
place += bitmap.size();
/// Store the non-null keys data.
for (size_t j = 0; j < keys_size; ++j)
{
size_t bucket = j / 8;
size_t offset = j % 8;
if (((bitmap[bucket] >> offset) & 1) == 0)
{
memcpy(place, keys[j].data, keys[j].size);
keys[j].data = place;
place += keys[j].size;
}
}
/// Store StringRef objects for all the keys, i.e. even for those
/// whose value is null.
memcpy(place, &keys[0], keys_size * sizeof(StringRef));
return {res, bitmap_size + keys_bytes};
}
/** Сериализовать ключи в непрерывный кусок памяти.
*/

View File

@ -488,7 +488,7 @@ struct AggregationMethodKeysFixed
/// Агрегирует по конкатенации ключей. (При этом, строки, содержащие нули посередине, могут склеиться.)
template <typename TData, bool has_nullable_keys_ = false>
template <typename TData>
struct AggregationMethodConcat
{
using Data = TData;
@ -497,8 +497,6 @@ struct AggregationMethodConcat
using iterator = typename Data::iterator;
using const_iterator = typename Data::const_iterator;
static constexpr bool has_nullable_keys = has_nullable_keys_;
Data data;
AggregationMethodConcat() {}
@ -520,7 +518,7 @@ struct AggregationMethodConcat
StringRefs & keys,
Arena & pool) const
{
return extractKeysAndPlaceInPoolContiguous<has_nullable_keys>(i, keys_size, key_columns, keys, pool);
return extractKeysAndPlaceInPoolContiguous(i, keys_size, key_columns, keys, pool);
}
};
@ -541,15 +539,11 @@ struct AggregationMethodConcat
static void insertKeyIntoColumns(const typename Data::value_type & value, ColumnPlainPtrs & key_columns, size_t keys_size, const Sizes & key_sizes)
{
if (has_nullable_keys)
insertKeyIntoNullableColumnsImpl(value, key_columns, keys_size, key_sizes);
else
insertKeyIntoColumnsImpl(value, key_columns, keys_size, key_sizes);
}
private:
/// Insert the values of the specified keys into the corresponding columns.
/// Implementation for the case where there are no nullable keys.
static void insertKeyIntoColumnsImpl(const typename Data::value_type & value, ColumnPlainPtrs & key_columns, size_t keys_size, const Sizes & key_sizes)
{
/// См. функцию extractKeysAndPlaceInPoolContiguous.
@ -570,92 +564,6 @@ private:
key_columns[i]->insertDataWithTerminatingZero(key_refs[i].data, key_refs[i].size);
}
}
/// Insert the value of the specified keys into the corresponding columns.
/// Implementation for the case where there is at least one nullable key.
static void insertKeyIntoNullableColumnsImpl(const typename Data::value_type & value, ColumnPlainPtrs & key_columns, size_t keys_size, const Sizes & key_sizes)
{
size_t compact_bitmap_size = keys_size / 8;
if ((keys_size % 8) != 0) { ++compact_bitmap_size; }
if (unlikely(value.first.size < compact_bitmap_size))
{
/// This code path is logically impossible.
/// Only a bug in the code base can trigger it.
throw Exception{"Aggregator: corrupted hash table key", ErrorCodes::LOGICAL_ERROR};
}
else if (unlikely(value.first.size == compact_bitmap_size))
{
/// This case occurs when each of the keys falls into either of the following two
/// categories: (i) it has a null value; (ii) it represents an empty array.
/// The remarks are the same as for the implementation of the non-nullable case above.
const UInt8 * compact_bitmap = reinterpret_cast<const UInt8 *>(value.first.data);
for (size_t i = 0; i < keys_size; ++i)
{
IColumn * observed_column;
if (key_columns[i]->isNullable())
{
ColumnNullable & nullable_col = static_cast<ColumnNullable &>(*key_columns[i]);
observed_column = nullable_col.getNestedColumn().get();
ColumnUInt8 & null_map = nullable_col.getNullMapConcreteColumn();
size_t bucket = i / 8;
size_t offset = i % 8;
UInt8 is_null = (compact_bitmap[bucket] >> offset) & 1;
null_map.insert(is_null);
}
else
observed_column = key_columns[i];
observed_column->insertDefault();
}
}
else
{
const UInt8 * compact_bitmap = reinterpret_cast<const UInt8 *>(value.first.data);
const StringRef * key_refs = reinterpret_cast<const StringRef *>(value.first.data + value.first.size);
for (size_t i = 0; i < keys_size; ++i)
{
IColumn * observed_column;
ColumnUInt8 * null_map;
/// If we have a nullable column, get its nested column and its null map.
if (key_columns[i]->isNullable())
{
ColumnNullable & nullable_col = static_cast<ColumnNullable &>(*key_columns[i]);
observed_column = nullable_col.getNestedColumn().get();
null_map = &nullable_col.getNullMapConcreteColumn();
}
else
{
observed_column = key_columns[i];
null_map = nullptr;
}
bool is_null;
if (key_columns[i]->isNullable())
{
/// The current column is nullable. Check if the value of the
/// corresponding key is nullable. Update the null map accordingly.
size_t bucket = i / 8;
size_t offset = i % 8;
UInt8 val = (compact_bitmap[bucket] >> offset) & 1;
null_map->insert(val);
is_null = val == 1;
}
else
is_null = false;
if (is_null)
observed_column->insertDefault();
else
observed_column->insertDataWithTerminatingZero(key_refs[i].data, key_refs[i].size);
}
}
}
};
@ -846,10 +754,8 @@ struct AggregatedDataVariants : private boost::noncopyable
/// Support for nullable keys.
std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128, true>> nullable_keys128;
std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256, true>> nullable_keys256;
std::unique_ptr<AggregationMethodConcat<AggregatedDataWithStringKey, true>> nullable_concat;
std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128TwoLevel, true>> nullable_keys128_two_level;
std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256TwoLevel, true>> nullable_keys256_two_level;
std::unique_ptr<AggregationMethodConcat<AggregatedDataWithStringKeyTwoLevel, true>> nullable_concat_two_level;
/// В этом и подобных макросах, вариант without_key не учитывается.
#define APPLY_FOR_AGGREGATED_VARIANTS(M) \
@ -882,10 +788,8 @@ struct AggregatedDataVariants : private boost::noncopyable
M(serialized_hash64, false) \
M(nullable_keys128, false) \
M(nullable_keys256, false) \
M(nullable_concat, false) \
M(nullable_keys128_two_level, true) \
M(nullable_keys256_two_level, true) \
M(nullable_concat_two_level, true)
enum class Type
{
@ -1005,7 +909,6 @@ struct AggregatedDataVariants : private boost::noncopyable
M(serialized) \
M(nullable_keys128) \
M(nullable_keys256) \
M(nullable_concat) \
#define APPLY_FOR_VARIANTS_NOT_CONVERTIBLE_TO_TWO_LEVEL(M) \
M(key8) \
@ -1050,8 +953,7 @@ struct AggregatedDataVariants : private boost::noncopyable
M(concat_two_level) \
M(serialized_two_level) \
M(nullable_keys128_two_level) \
M(nullable_keys256_two_level) \
M(nullable_concat_two_level)
M(nullable_keys256_two_level)
};
using AggregatedDataVariantsPtr = std::shared_ptr<AggregatedDataVariants>;

View File

@ -461,17 +461,6 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod(const ConstColu
return AggregatedDataVariants::Type::nullable_keys256;
}
/// XXX Aggregation with Array(Nullable(T)) keys can be done much more efficiently.
if (has_arrays_of_nullable)
return AggregatedDataVariants::Type::serialized;
/// For the following two cases, see the comments below on the non-nullable variant,
/// since it is similar.
if (num_array_keys == 1 && !has_arrays_of_non_fixed_elems && all_non_array_keys_are_fixed)
return AggregatedDataVariants::Type::nullable_concat;
if (num_array_keys == 0 && !has_tuples)
return AggregatedDataVariants::Type::nullable_concat;
/// Fallback case.
return AggregatedDataVariants::Type::serialized;
}