Merge pull request #3518 from yandex/low-cardinality-fix-nullable-serialization-into-arena

Low cardinality fix nullable serialization into arena
This commit is contained in:
alexey-milovidov 2018-11-01 20:12:06 +03:00 committed by GitHub
commit e2b0c6c94f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 77 additions and 9 deletions

View File

@ -62,20 +62,13 @@ public:
UInt64 getUInt(size_t n) const override { return getNestedColumn()->getUInt(n); }
Int64 getInt(size_t n) const override { return getNestedColumn()->getInt(n); }
bool isNullAt(size_t n) const override { return is_nullable && n == getNullValueIndex(); }
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override
{
return column_holder->serializeValueIntoArena(n, arena, begin);
}
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override
{
return getNestedColumn()->updateHashWithValue(n, hash);
}
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override
{
auto & column_unique = static_cast<const IColumnUnique &>(rhs);
return getNestedColumn()->compareAt(n, m, *column_unique.getNestedColumn(), nan_direction_hint);
}
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
void getExtremes(Field & min, Field & max) const override { column_holder->getExtremes(min, max); }
bool valuesHaveFixedSize() const override { return column_holder->valuesHaveFixedSize(); }
@ -298,9 +291,44 @@ size_t ColumnUnique<ColumnType>::uniqueInsertDataWithTerminatingZero(const char
return static_cast<size_t>(position);
}
template <typename ColumnType>
StringRef ColumnUnique<ColumnType>::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
{
if (is_nullable)
{
const UInt8 null_flag = 1;
const UInt8 not_null_flag = 0;
auto pos = arena.allocContinue(sizeof(null_flag), begin);
auto & flag = (n == getNullValueIndex() ? null_flag : not_null_flag);
memcpy(pos, &flag, sizeof(flag));
size_t nested_size = 0;
if (n == getNullValueIndex())
nested_size = column_holder->serializeValueIntoArena(n, arena, begin).size;
return StringRef(pos, sizeof(null_flag) + nested_size);
}
return column_holder->serializeValueIntoArena(n, arena, begin);
}
template <typename ColumnType>
size_t ColumnUnique<ColumnType>::uniqueDeserializeAndInsertFromArena(const char * pos, const char *& new_pos)
{
if (is_nullable)
{
UInt8 val = *reinterpret_cast<const UInt8 *>(pos);
pos += sizeof(val);
if (val)
{
new_pos = pos;
return getNullValueIndex();
}
}
auto column = getRawColumnPtr();
size_t prev_size = column->size();
new_pos = column->deserializeAndInsertFromArena(pos);
@ -318,6 +346,28 @@ size_t ColumnUnique<ColumnType>::uniqueDeserializeAndInsertFromArena(const char
return static_cast<size_t>(index_pos);
}
template <typename ColumnType>
int ColumnUnique<ColumnType>::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
{
if (is_nullable)
{
/// See ColumnNullable::compareAt
bool lval_is_null = n == getNullValueIndex();
bool rval_is_null = m == getNullValueIndex();
if (unlikely(lval_is_null || rval_is_null))
{
if (lval_is_null && rval_is_null)
return 0;
else
return lval_is_null ? nan_direction_hint : -nan_direction_hint;
}
}
auto & column_unique = static_cast<const IColumnUnique &>(rhs);
return getNestedColumn()->compareAt(n, m, *column_unique.getNestedColumn(), nan_direction_hint);
}
template <typename IndexType>
static void checkIndexes(const ColumnVector<IndexType> & indexes, size_t max_dictionary_size)
{

View File

@ -0,0 +1,10 @@
\N 333334
1 1
2 1
4 1
5 1
7 1
8 1
10 1
11 1
13 1

View File

@ -0,0 +1,8 @@
SET allow_experimental_low_cardinality_type = 1;
drop table if exists test.low_null_float;
CREATE TABLE test.low_null_float (a LowCardinality(Nullable(Float64))) ENGINE = MergeTree order by tuple();
INSERT INTO test.low_null_float (a) SELECT if(number % 3 == 0, Null, number) FROM system.numbers LIMIT 1000000;
SELECT a, count() FROM test.low_null_float GROUP BY a ORDER BY count() desc, a LIMIT 10;
drop table if exists test.low_null_float;