diff --git a/dbms/src/DataTypes/DataTypeWithDictionary.cpp b/dbms/src/DataTypes/DataTypeWithDictionary.cpp index fc904f43a8f..8877eb820f1 100644 --- a/dbms/src/DataTypes/DataTypeWithDictionary.cpp +++ b/dbms/src/DataTypes/DataTypeWithDictionary.cpp @@ -669,6 +669,7 @@ void DataTypeWithDictionary::deserializeBinaryBulkWithMultipleStreams( } }; + bool first_dictionary = true; while (limit) { if (state_with_dictionary->num_pending_rows == 0) @@ -681,8 +682,11 @@ void DataTypeWithDictionary::deserializeBinaryBulkWithMultipleStreams( index_type.deserialize(*indexes_stream); - if (index_type.need_global_dictionary && (!global_dictionary || index_type.need_update_dictionary)) + if (index_type.need_global_dictionary && (!global_dictionary || index_type.need_update_dictionary || (first_dictionary && !settings.continuous_reading))) + { readDictionary(); + first_dictionary = false; + } if (state_with_dictionary->index_type.has_additional_keys) readAdditionalKeys(); diff --git a/dbms/src/DataTypes/IDataType.h b/dbms/src/DataTypes/IDataType.h index f0b2f267077..da55762c02f 100644 --- a/dbms/src/DataTypes/IDataType.h +++ b/dbms/src/DataTypes/IDataType.h @@ -135,6 +135,9 @@ public: InputStreamGetter getter; SubstreamPath path; + /// True if continue reading from previous positions in file. False if made fseek to the start of new granule. + bool continuous_reading = true; + bool position_independent_encoding = true; /// If not zero, may be used to avoid reallocations while reading column of String type. double avg_value_size_hint = 0; diff --git a/dbms/src/Storages/MergeTree/MergeTreeReader.cpp b/dbms/src/Storages/MergeTree/MergeTreeReader.cpp index 7207d81cce1..d9be484d5d6 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeReader.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeReader.cpp @@ -427,6 +427,7 @@ void MergeTreeReader::readData( } settings.getter = get_stream_getter(false); + settings.continuous_reading = continue_reading; auto & deserialize_state = deserialize_binary_bulk_state_map[name]; type.deserializeBinaryBulkWithMultipleStreams(column, max_rows_to_read, settings, deserialize_state); IDataType::updateAvgValueSizeHint(column, avg_value_size_hint); diff --git a/dbms/tests/queries/0_stateless/00688_low_cardinality_dictionary_deserialization.reference b/dbms/tests/queries/0_stateless/00688_low_cardinality_dictionary_deserialization.reference new file mode 100644 index 00000000000..cccc32b44e2 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00688_low_cardinality_dictionary_deserialization.reference @@ -0,0 +1 @@ +499463014060 499463014060 diff --git a/dbms/tests/queries/0_stateless/00688_low_cardinality_dictionary_deserialization.sql b/dbms/tests/queries/0_stateless/00688_low_cardinality_dictionary_deserialization.sql new file mode 100644 index 00000000000..44776ce30e4 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00688_low_cardinality_dictionary_deserialization.sql @@ -0,0 +1,7 @@ +set allow_experimental_low_cardinality_type = 1; +drop table if exists test.lc_dict_reading; +create table test.lc_dict_reading (val UInt64, str StringWithDictionary, pat String) engine = MergeTree order by val; +insert into test.lc_dict_reading select number, if(number < 8192 * 4, number % 100, number) as s, s from system.numbers limit 1000000; +select sum(toUInt64(str)), sum(toUInt64(pat)) from test.lc_dict_reading where val < 8129 or val > 8192 * 4; +drop table if exists test.lc_dict_reading; +