Merge pull request #71299 from Avogar/fix-dynamic-lc-dict

Fix reading of LowCardinality dictionary in Dynamic column
This commit is contained in:
Pavel Kruglov 2024-11-01 11:02:00 +00:00 committed by GitHub
commit 5288729d60
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 45 additions and 3 deletions

View File

@ -434,6 +434,14 @@ bool ISerialization::isDynamicSubcolumn(const DB::ISerialization::SubstreamPath
return false;
}
bool ISerialization::isLowCardinalityDictionarySubcolumn(const DB::ISerialization::SubstreamPath & path)
{
if (path.empty())
return false;
return path[path.size() - 1].type == SubstreamType::DictionaryKeys;
}
ISerialization::SubstreamData ISerialization::createFromPath(const SubstreamPath & path, size_t prefix_len)
{
assert(prefix_len <= path.size());

View File

@ -463,6 +463,8 @@ public:
/// Returns true if stream with specified path corresponds to dynamic subcolumn.
static bool isDynamicSubcolumn(const SubstreamPath & path, size_t prefix_len);
static bool isLowCardinalityDictionarySubcolumn(const SubstreamPath & path);
protected:
template <typename State, typename StatePtr>
State * checkAndGetState(const StatePtr & state) const;

View File

@ -54,7 +54,7 @@ void SerializationLowCardinality::enumerateStreams(
.withSerializationInfo(data.serialization_info);
settings.path.back().data = dict_data;
dict_inner_serialization->enumerateStreams(settings, callback, dict_data);
callback(settings.path);
settings.path.back() = Substream::DictionaryIndexes;
settings.path.back().data = data;

View File

@ -262,7 +262,7 @@ MergeTreeReaderWide::FileStreams::iterator MergeTreeReaderWide::addStream(const
/*num_columns_in_mark=*/ 1);
auto stream_settings = settings;
stream_settings.is_low_cardinality_dictionary = substream_path.size() > 1 && substream_path[substream_path.size() - 2].type == ISerialization::Substream::Type::DictionaryKeys;
stream_settings.is_low_cardinality_dictionary = ISerialization::isLowCardinalityDictionarySubcolumn(substream_path);
auto create_stream = [&]<typename Stream>()
{

View File

@ -1,7 +1,7 @@
['{ArraySizes}','{ArrayElements, Regular}']
['{ArraySizes}','{ArrayElements, TupleElement(keys), Regular}','{ArrayElements, TupleElement(values), Regular}']
['{TupleElement(1), Regular}','{TupleElement(2), Regular}','{TupleElement(3), Regular}']
['{DictionaryKeys, Regular}','{DictionaryIndexes}']
['{DictionaryKeys}','{DictionaryIndexes}']
['{NullMap}','{NullableElements, Regular}']
['{ArraySizes}','{ArrayElements, Regular}']
['{ArraySizes}','{ArrayElements, TupleElement(keys), Regular}','{ArrayElements, TupleElement(values), Regular}']

View File

@ -0,0 +1,20 @@
12345678
12345678
12345678
12345678
12345678
12345678
12345678
12345678
12345678
12345678
12345678
12345678
12345678
12345678
12345678
12345678
12345678
12345678
12345678
12345678

View File

@ -0,0 +1,12 @@
set allow_experimental_dynamic_type = 1;
set min_bytes_to_use_direct_io = 0;
drop table if exists test;
create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, index_granularity=1, use_adaptive_write_buffer_for_dynamic_subcolumns=0, max_compress_block_size=8, min_compress_block_size=8, use_compact_variant_discriminators_serialization=0;
insert into test select number, '12345678'::LowCardinality(String) from numbers(20);
select d.`LowCardinality(String)` from test settings max_threads=1;
drop table test;