Merge pull request #2911 from yandex/low-cardinality-fix-dictionary-deserialization

Low cardinality fix dictionary deserialization
This commit is contained in:
alexey-milovidov 2018-08-21 17:18:32 +03:00 committed by GitHub
commit aebddd550d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 17 additions and 1 deletions

View File

@ -669,6 +669,7 @@ void DataTypeWithDictionary::deserializeBinaryBulkWithMultipleStreams(
}
};
bool first_dictionary = true;
while (limit)
{
if (state_with_dictionary->num_pending_rows == 0)
@ -681,8 +682,11 @@ void DataTypeWithDictionary::deserializeBinaryBulkWithMultipleStreams(
index_type.deserialize(*indexes_stream);
if (index_type.need_global_dictionary && (!global_dictionary || index_type.need_update_dictionary))
if (index_type.need_global_dictionary && (!global_dictionary || index_type.need_update_dictionary || (first_dictionary && !settings.continuous_reading)))
{
readDictionary();
first_dictionary = false;
}
if (state_with_dictionary->index_type.has_additional_keys)
readAdditionalKeys();

View File

@ -135,6 +135,9 @@ public:
InputStreamGetter getter;
SubstreamPath path;
/// True if continue reading from previous positions in file. False if made fseek to the start of new granule.
bool continuous_reading = true;
bool position_independent_encoding = true;
/// If not zero, may be used to avoid reallocations while reading column of String type.
double avg_value_size_hint = 0;

View File

@ -427,6 +427,7 @@ void MergeTreeReader::readData(
}
settings.getter = get_stream_getter(false);
settings.continuous_reading = continue_reading;
auto & deserialize_state = deserialize_binary_bulk_state_map[name];
type.deserializeBinaryBulkWithMultipleStreams(column, max_rows_to_read, settings, deserialize_state);
IDataType::updateAvgValueSizeHint(column, avg_value_size_hint);

View File

@ -0,0 +1 @@
499463014060 499463014060

View File

@ -0,0 +1,7 @@
set allow_experimental_low_cardinality_type = 1;
drop table if exists test.lc_dict_reading;
create table test.lc_dict_reading (val UInt64, str StringWithDictionary, pat String) engine = MergeTree order by val;
insert into test.lc_dict_reading select number, if(number < 8192 * 4, number % 100, number) as s, s from system.numbers limit 1000000;
select sum(toUInt64(str)), sum(toUInt64(pat)) from test.lc_dict_reading where val < 8129 or val > 8192 * 4;
drop table if exists test.lc_dict_reading;