Merge pull request #4594 from yandex/fix-low-cardinality_serialization-for-set-index

Fix Nullable and LowCardinality serialization for set index
This commit is contained in:
alexey-milovidov 2019-03-06 21:57:26 +03:00 committed by GitHub
commit 7e4c497e19
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 159 additions and 5 deletions

View File

@ -11,6 +11,7 @@
#include <Storages/MergeTree/MarkRange.h>
#include <Interpreters/ExpressionActions.h>
#include <Parsers/ASTIndexDeclaration.h>
#include <DataTypes/DataTypeLowCardinality.h>
constexpr auto INDEX_FILE_PREFIX = "skp_idx_";

View File

@ -18,7 +18,7 @@ namespace ErrorCodes
}
/// 0b11 -- can be true and false at the same time
const Field UNKNOWN_FIELD(3);
const Field UNKNOWN_FIELD(3u);
MergeTreeSetIndexGranule::MergeTreeSetIndexGranule(const MergeTreeSetSkippingIndex & index)
@ -47,7 +47,16 @@ void MergeTreeSetIndexGranule::serializeBinary(WriteBuffer & ostr) const
for (size_t i = 0; i < index.columns.size(); ++i)
{
const auto & type = index.data_types[i];
type->serializeBinaryBulk(*columns[i], ostr, 0, size());
IDataType::SerializeBinaryBulkSettings settings;
settings.getter = [&ostr](IDataType::SubstreamPath) -> WriteBuffer * { return &ostr; };
settings.position_independent_encoding = false;
settings.low_cardinality_max_dictionary_size = 0;
IDataType::SerializeBinaryBulkStatePtr state;
type->serializeBinaryBulkStatePrefix(settings, state);
type->serializeBinaryBulkWithMultipleStreams(*columns[i], 0, size(), settings, state);
type->serializeBinaryBulkStateSuffix(settings, state);
}
}
@ -66,11 +75,21 @@ void MergeTreeSetIndexGranule::deserializeBinary(ReadBuffer & istr)
size_type->deserializeBinary(field_rows, istr);
size_t rows_to_read = field_rows.get<size_t>();
if (rows_to_read == 0)
return;
for (size_t i = 0; i < index.columns.size(); ++i)
{
const auto & type = index.data_types[i];
auto new_column = type->createColumn();
type->deserializeBinaryBulk(*new_column, istr, rows_to_read, 0);
IDataType::DeserializeBinaryBulkSettings settings;
settings.getter = [&](IDataType::SubstreamPath) -> ReadBuffer * { return &istr; };
settings.position_independent_encoding = false;
IDataType::DeserializeBinaryBulkStatePtr state;
type->deserializeBinaryBulkStatePrefix(settings, state);
type->deserializeBinaryBulkWithMultipleStreams(*new_column, rows_to_read, settings, state);
block.insert(ColumnWithTypeAndName(new_column->getPtr(), type, index.columns[i]));
}
@ -177,10 +196,24 @@ bool SetIndexCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule)
Block result = granule->getElementsBlock();
actions->execute(result);
const auto & column = result.getByName(expression_ast->getColumnName()).column;
auto column = result.getByName(expression_ast->getColumnName()).column->convertToFullColumnIfLowCardinality();
auto * col_uint8 = typeid_cast<const ColumnUInt8 *>(column.get());
const NullMap * null_map = nullptr;
if (auto * col_nullable = typeid_cast<const ColumnNullable *>(column.get()))
{
col_uint8 = typeid_cast<const ColumnUInt8 *>(&col_nullable->getNestedColumn());
null_map = &col_nullable->getNullMapData();
}
if (!col_uint8)
throw Exception("ColumnUInt8 expected as Set index condition result.", ErrorCodes::LOGICAL_ERROR);
auto & condition = col_uint8->getData();
for (size_t i = 0; i < column->size(); ++i)
if (column->getInt(i) & 1)
if ((!null_map || (*null_map)[i] == 0) && condition[i] & 1)
return true;
return false;

View File

@ -0,0 +1,30 @@
1 a
-
2 b
-
--
1 a
-
2 b
-
--
1 a
-
2 b
-
----
1 a
-
2 b
-
--
1 a
-
2 b
-
--
1 a
-
2 b
-
----

View File

@ -0,0 +1,69 @@
SET allow_experimental_data_skipping_indices=1;
drop table if exists test.nullable_set_index;
create table test.nullable_set_index (a UInt64, b Nullable(String), INDEX b_index b TYPE set(0) GRANULARITY 8192) engine = MergeTree order by a;
insert into test.nullable_set_index values (1, 'a');
insert into test.nullable_set_index values (2, 'b');
select * from test.nullable_set_index where b = 'a';
select '-';
select * from test.nullable_set_index where b = 'b';
select '-';
select * from test.nullable_set_index where b = 'c';
select '--';
drop table if exists test.nullable_set_index;
create table test.nullable_set_index (a UInt64, b Nullable(String), INDEX b_index b TYPE set(1) GRANULARITY 8192) engine = MergeTree order by a;
insert into test.nullable_set_index values (1, 'a');
insert into test.nullable_set_index values (2, 'b');
select * from test.nullable_set_index where b = 'a';
select '-';
select * from test.nullable_set_index where b = 'b';
select '-';
select * from test.nullable_set_index where b = 'c';
select '--';
drop table if exists test.nullable_set_index;
create table test.nullable_set_index (a UInt64, b Nullable(String), INDEX b_index b TYPE set(0) GRANULARITY 8192) engine = MergeTree order by a;
insert into test.nullable_set_index values (1, 'a'), (2, 'b');
select * from test.nullable_set_index where b = 'a';
select '-';
select * from test.nullable_set_index where b = 'b';
select '-';
select * from test.nullable_set_index where b = 'c';
select '----';
drop table if exists test.nullable_set_index;
create table test.nullable_set_index (a UInt64, b LowCardinality(Nullable(String)), INDEX b_index b TYPE set(0) GRANULARITY 8192) engine = MergeTree order by a;
insert into test.nullable_set_index values (1, 'a');
insert into test.nullable_set_index values (2, 'b');
select * from test.nullable_set_index where b = 'a';
select '-';
select * from test.nullable_set_index where b = 'b';
select '-';
select * from test.nullable_set_index where b = 'c';
select '--';
drop table if exists test.nullable_set_index;
create table test.nullable_set_index (a UInt64, b LowCardinality(Nullable(String)), INDEX b_index b TYPE set(1) GRANULARITY 8192) engine = MergeTree order by a;
insert into test.nullable_set_index values (1, 'a');
insert into test.nullable_set_index values (2, 'b');
select * from test.nullable_set_index where b = 'a';
select '-';
select * from test.nullable_set_index where b = 'b';
select '-';
select * from test.nullable_set_index where b = 'c';
select '--';
drop table if exists test.nullable_set_index;
create table test.nullable_set_index (a UInt64, b LowCardinality(Nullable(String)), INDEX b_index b TYPE set(0) GRANULARITY 8192) engine = MergeTree order by a;
insert into test.nullable_set_index values (1, 'a'), (2, 'b');
select * from test.nullable_set_index where b = 'a';
select '-';
select * from test.nullable_set_index where b = 'b';
select '-';
select * from test.nullable_set_index where b = 'c';
select '----';
drop table if exists test.nullable_set_index;

View File

@ -0,0 +1,20 @@
SET allow_experimental_data_skipping_indices=1;
drop table if exists test.null_lc_set_index;
CREATE TABLE test.null_lc_set_index (
timestamp DateTime,
action LowCardinality(Nullable(String)),
user LowCardinality(Nullable(String)),
INDEX test_user_idx (user) TYPE set(0) GRANULARITY 8192
) ENGINE=MergeTree
PARTITION BY toYYYYMMDD(timestamp)
ORDER BY (timestamp, action, cityHash64(user))
SAMPLE BY cityHash64(user);
INSERT INTO test.null_lc_set_index VALUES (1550883010, 'subscribe', 'alice');
INSERT INTO test.null_lc_set_index VALUES (1550883020, 'follow', 'bob');
SELECT action, user FROM test.null_lc_set_index WHERE user = 'alice';
drop table if exists test.null_lc_set_index;