Fixed reading from Array(LowCardinality) column with long block of empty array.

This commit is contained in:
Nikolai Kochetov 2019-03-29 17:06:46 +03:00
parent d10b256a8a
commit 53319e2d6e
5 changed files with 25 additions and 18 deletions

View File

@ -306,21 +306,11 @@ void ColumnLowCardinality::setSharedDictionary(const ColumnPtr & column_unique)
dictionary.setShared(column_unique);
}
ColumnLowCardinality::MutablePtr ColumnLowCardinality::compact()
{
auto positions = idx.getPositions();
/// Create column with new indexes and old dictionary.
auto column = ColumnLowCardinality::create(getDictionary().assumeMutable(), (*std::move(positions)).mutate());
/// Will create new dictionary.
column->compactInplace();
return column;
}
ColumnLowCardinality::MutablePtr ColumnLowCardinality::cutAndCompact(size_t start, size_t length) const
{
auto sub_positions = (*idx.getPositions()->cut(start, length)).mutate();
/// Create column with new indexes and old dictionary.
/// Dictionary is shared, but wil be recreated after compactInplace call.
auto column = ColumnLowCardinality::create(getDictionary().assumeMutable(), std::move(sub_positions));
/// Will create new dictionary.
column->compactInplace();

View File

@ -177,10 +177,8 @@ public:
void setSharedDictionary(const ColumnPtr & column_unique);
bool isSharedDictionary() const { return dictionary.isShared(); }
/// Create column new dictionary with only keys that are mentioned in index.
MutablePtr compact();
/// Cut + compact.
/// Create column with new dictionary from column part.
/// Dictionary will have only keys that are mentioned in index.
MutablePtr cutAndCompact(size_t start, size_t length) const;
struct DictionaryEncodedColumn

View File

@ -195,6 +195,11 @@ struct DeserializeStateLowCardinality : public IDataType::DeserializeBinaryBulkS
ColumnPtr null_map;
UInt64 num_pending_rows = 0;
/// If dictionary should be updated.
/// Can happen is some granules was skipped while reading from MergeTree.
/// We should store this flag in State because in case of long block of empty arrays, when we read nothing.
bool need_update_dictionary = false;
explicit DeserializeStateLowCardinality(UInt64 key_version) : key_version(key_version) {}
};
@ -686,7 +691,12 @@ void DataTypeLowCardinality::deserializeBinaryBulkWithMultipleStreams(
if (!settings.continuous_reading)
low_cardinality_state->num_pending_rows = 0;
bool first_dictionary = true;
if (!settings.continuous_reading)
{
/// Remember in state that some granules was skipped and we need to update dictionary.
low_cardinality_state->need_update_dictionary = true;
}
while (limit)
{
if (low_cardinality_state->num_pending_rows == 0)
@ -699,10 +709,12 @@ void DataTypeLowCardinality::deserializeBinaryBulkWithMultipleStreams(
index_type.deserialize(*indexes_stream);
if (index_type.need_global_dictionary && (!global_dictionary || index_type.need_update_dictionary || (first_dictionary && !settings.continuous_reading)))
bool need_update_dictionary =
!global_dictionary || index_type.need_update_dictionary || low_cardinality_state->need_update_dictionary;
if (index_type.need_global_dictionary && need_update_dictionary)
{
readDictionary();
first_dictionary = false;
low_cardinality_state->need_update_dictionary = false;
}
if (low_cardinality_state->index_type.has_additional_keys)

View File

@ -0,0 +1,7 @@
drop table if exists test.lc;
create table test.lc (key UInt64, value Array(LowCardinality(String))) engine = MergeTree order by key;
insert into test.lc select number, if(number < 10000 or number > 100000, [toString(number)], emptyArrayString()) from system.numbers limit 200000;
select * from test.lc where (key < 100 or key > 50000) and not has(value, toString(key)) and length(value) == 1 limit 10 settings max_block_size = 8192, max_threads = 1;
drop table if exists test.lc;