mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 16:50:48 +00:00
Fixed reading from Array(LowCardinality) column with long block of empty array.
This commit is contained in:
parent
d10b256a8a
commit
53319e2d6e
@ -306,21 +306,11 @@ void ColumnLowCardinality::setSharedDictionary(const ColumnPtr & column_unique)
|
||||
dictionary.setShared(column_unique);
|
||||
}
|
||||
|
||||
ColumnLowCardinality::MutablePtr ColumnLowCardinality::compact()
|
||||
{
|
||||
auto positions = idx.getPositions();
|
||||
/// Create column with new indexes and old dictionary.
|
||||
auto column = ColumnLowCardinality::create(getDictionary().assumeMutable(), (*std::move(positions)).mutate());
|
||||
/// Will create new dictionary.
|
||||
column->compactInplace();
|
||||
|
||||
return column;
|
||||
}
|
||||
|
||||
ColumnLowCardinality::MutablePtr ColumnLowCardinality::cutAndCompact(size_t start, size_t length) const
|
||||
{
|
||||
auto sub_positions = (*idx.getPositions()->cut(start, length)).mutate();
|
||||
/// Create column with new indexes and old dictionary.
|
||||
/// Dictionary is shared, but wil be recreated after compactInplace call.
|
||||
auto column = ColumnLowCardinality::create(getDictionary().assumeMutable(), std::move(sub_positions));
|
||||
/// Will create new dictionary.
|
||||
column->compactInplace();
|
||||
|
@ -177,10 +177,8 @@ public:
|
||||
void setSharedDictionary(const ColumnPtr & column_unique);
|
||||
bool isSharedDictionary() const { return dictionary.isShared(); }
|
||||
|
||||
/// Create column new dictionary with only keys that are mentioned in index.
|
||||
MutablePtr compact();
|
||||
|
||||
/// Cut + compact.
|
||||
/// Create column with new dictionary from column part.
|
||||
/// Dictionary will have only keys that are mentioned in index.
|
||||
MutablePtr cutAndCompact(size_t start, size_t length) const;
|
||||
|
||||
struct DictionaryEncodedColumn
|
||||
|
@ -195,6 +195,11 @@ struct DeserializeStateLowCardinality : public IDataType::DeserializeBinaryBulkS
|
||||
ColumnPtr null_map;
|
||||
UInt64 num_pending_rows = 0;
|
||||
|
||||
/// If dictionary should be updated.
|
||||
/// Can happen is some granules was skipped while reading from MergeTree.
|
||||
/// We should store this flag in State because in case of long block of empty arrays, when we read nothing.
|
||||
bool need_update_dictionary = false;
|
||||
|
||||
explicit DeserializeStateLowCardinality(UInt64 key_version) : key_version(key_version) {}
|
||||
};
|
||||
|
||||
@ -686,7 +691,12 @@ void DataTypeLowCardinality::deserializeBinaryBulkWithMultipleStreams(
|
||||
if (!settings.continuous_reading)
|
||||
low_cardinality_state->num_pending_rows = 0;
|
||||
|
||||
bool first_dictionary = true;
|
||||
if (!settings.continuous_reading)
|
||||
{
|
||||
/// Remember in state that some granules was skipped and we need to update dictionary.
|
||||
low_cardinality_state->need_update_dictionary = true;
|
||||
}
|
||||
|
||||
while (limit)
|
||||
{
|
||||
if (low_cardinality_state->num_pending_rows == 0)
|
||||
@ -699,10 +709,12 @@ void DataTypeLowCardinality::deserializeBinaryBulkWithMultipleStreams(
|
||||
|
||||
index_type.deserialize(*indexes_stream);
|
||||
|
||||
if (index_type.need_global_dictionary && (!global_dictionary || index_type.need_update_dictionary || (first_dictionary && !settings.continuous_reading)))
|
||||
bool need_update_dictionary =
|
||||
!global_dictionary || index_type.need_update_dictionary || low_cardinality_state->need_update_dictionary;
|
||||
if (index_type.need_global_dictionary && need_update_dictionary)
|
||||
{
|
||||
readDictionary();
|
||||
first_dictionary = false;
|
||||
low_cardinality_state->need_update_dictionary = false;
|
||||
}
|
||||
|
||||
if (low_cardinality_state->index_type.has_additional_keys)
|
||||
|
@ -0,0 +1,7 @@
|
||||
drop table if exists test.lc;
|
||||
create table test.lc (key UInt64, value Array(LowCardinality(String))) engine = MergeTree order by key;
|
||||
insert into test.lc select number, if(number < 10000 or number > 100000, [toString(number)], emptyArrayString()) from system.numbers limit 200000;
|
||||
select * from test.lc where (key < 100 or key > 50000) and not has(value, toString(key)) and length(value) == 1 limit 10 settings max_block_size = 8192, max_threads = 1;
|
||||
|
||||
drop table if exists test.lc;
|
||||
|
Loading…
Reference in New Issue
Block a user