mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-27 01:51:59 +00:00
fix reading of empty Nested(Array(...))
This commit is contained in:
parent
d43e76b147
commit
991abde851
@ -242,7 +242,7 @@ IMergeTreeReader::ColumnNameLevel IMergeTreeReader::findColumnForOffsets(const N
|
||||
|
||||
/// Find column that has maximal number of matching
|
||||
/// offsets columns with required_column.
|
||||
for (const auto & part_column : data_part_info_for_read->getColumns())
|
||||
for (const auto & part_column : Nested::convertToSubcolumns(data_part_info_for_read->getColumns()))
|
||||
{
|
||||
auto name_in_storage = Nested::extractTableName(part_column.name);
|
||||
if (name_in_storage != required_name_in_storage)
|
||||
|
@ -105,10 +105,10 @@ protected:
|
||||
|
||||
NameSet partially_read_columns;
|
||||
|
||||
private:
|
||||
/// Alter conversions, which must be applied on fly if required
|
||||
AlterConversionsPtr alter_conversions;
|
||||
|
||||
private:
|
||||
/// Columns that are requested to read.
|
||||
NamesAndTypesList requested_columns;
|
||||
|
||||
|
@ -149,11 +149,34 @@ void MergeTreeReaderCompact::fillColumnPositions()
|
||||
position.reset();
|
||||
}
|
||||
|
||||
/// If array of Nested column is missing in part,
|
||||
/// we have to read its offsets if they exist.
|
||||
if (!position && is_array)
|
||||
{
|
||||
/// If array of Nested column is missing in part,
|
||||
/// we have to read its offsets if they exist.
|
||||
auto name_level_for_offsets = findColumnForOffsets(column_to_read);
|
||||
NameAndTypePair column_to_read_with_subcolumns = column_to_read;
|
||||
auto [name_in_storage, subcolumn_name] = Nested::splitName(column_to_read.name);
|
||||
|
||||
/// If it is a part of Nested, we need to get the column from
|
||||
/// storage metatadata which is converted to Nested type with subcolumns.
|
||||
/// It is required for proper counting of shared streams.
|
||||
if (!subcolumn_name.empty())
|
||||
{
|
||||
/// If column is renamed get the new name from storage metadata.
|
||||
if (alter_conversions->columnHasNewName(name_in_storage))
|
||||
name_in_storage = alter_conversions->getColumnNewName(name_in_storage);
|
||||
|
||||
if (!storage_columns_with_collected_nested)
|
||||
storage_columns_with_collected_nested = ColumnsDescription(
|
||||
Nested::collect(metadata_snapshot->getColumns().getAllPhysical()));
|
||||
|
||||
column_to_read_with_subcolumns = storage_columns_with_collected_nested
|
||||
->getColumnOrSubcolumn(
|
||||
GetColumnsOptions::All,
|
||||
Nested::concatenateName(name_in_storage, subcolumn_name));
|
||||
}
|
||||
|
||||
auto name_level_for_offsets = findColumnForOffsets(column_to_read_with_subcolumns);
|
||||
|
||||
if (name_level_for_offsets.has_value())
|
||||
{
|
||||
column_positions[i] = data_part_info_for_read->getColumnPosition(name_level_for_offsets->first);
|
||||
@ -162,7 +185,9 @@ void MergeTreeReaderCompact::fillColumnPositions()
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
column_positions[i] = std::move(position);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -297,6 +322,8 @@ void MergeTreeReaderCompact::readData(
|
||||
};
|
||||
|
||||
ISerialization::DeserializeBinaryBulkStatePtr state;
|
||||
ISerialization::DeserializeBinaryBulkStatePtr state_for_prefix;
|
||||
|
||||
ISerialization::DeserializeBinaryBulkSettings deserialize_settings;
|
||||
deserialize_settings.avg_value_size_hint = avg_value_size_hints[name];
|
||||
|
||||
@ -306,14 +333,18 @@ void MergeTreeReaderCompact::readData(
|
||||
|
||||
/// In case of reading onlys offset use the correct serialization for reading of the prefix
|
||||
auto serialization = getSerializationInPart(name_type_in_storage);
|
||||
auto serialization_for_prefix = column_for_offsets ? getSerializationInPart(*column_for_offsets) : serialization;
|
||||
|
||||
ColumnPtr temp_column = name_type_in_storage.type->createColumn(*serialization);
|
||||
|
||||
deserialize_settings.getter = buffer_getter_for_prefix;
|
||||
serialization_for_prefix->deserializeBinaryBulkStatePrefix(deserialize_settings, state);
|
||||
if (column_for_offsets)
|
||||
{
|
||||
auto serialization_for_prefix = getSerializationInPart(*column_for_offsets);
|
||||
|
||||
deserialize_settings.getter = buffer_getter_for_prefix;
|
||||
serialization_for_prefix->deserializeBinaryBulkStatePrefix(deserialize_settings, state_for_prefix);
|
||||
}
|
||||
|
||||
deserialize_settings.getter = buffer_getter;
|
||||
serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, state);
|
||||
serialization->deserializeBinaryBulkWithMultipleStreams(temp_column, rows_to_read, deserialize_settings, state, nullptr);
|
||||
|
||||
auto subcolumn = name_type_in_storage.type->getSubcolumn(name_and_type.getSubcolumnName(), temp_column);
|
||||
@ -328,12 +359,17 @@ void MergeTreeReaderCompact::readData(
|
||||
{
|
||||
/// In case of reading only offsets use the correct serialization for reading the prefix
|
||||
auto serialization = getSerializationInPart(name_and_type);
|
||||
auto serialization_for_prefix = column_for_offsets ? getSerializationInPart(*column_for_offsets) : serialization;
|
||||
|
||||
deserialize_settings.getter = buffer_getter_for_prefix;
|
||||
serialization_for_prefix->deserializeBinaryBulkStatePrefix(deserialize_settings, state);
|
||||
if (column_for_offsets)
|
||||
{
|
||||
auto serialization_for_prefix = getSerializationInPart(*column_for_offsets);
|
||||
|
||||
deserialize_settings.getter = buffer_getter_for_prefix;
|
||||
serialization_for_prefix->deserializeBinaryBulkStatePrefix(deserialize_settings, state_for_prefix);
|
||||
}
|
||||
|
||||
deserialize_settings.getter = buffer_getter;
|
||||
serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, state);
|
||||
serialization->deserializeBinaryBulkWithMultipleStreams(column, rows_to_read, deserialize_settings, state, nullptr);
|
||||
}
|
||||
|
||||
|
@ -52,6 +52,12 @@ private:
|
||||
|
||||
MergeTreeMarksLoader marks_loader;
|
||||
|
||||
/// Storage columns with collected separate arrays of Nested to columns of Nested type.
|
||||
/// They maybe be needed for finding offsets of missed Nested columns in parts.
|
||||
/// They are rarely used and are heavy to initialized, so we create them
|
||||
/// only on demand and cache in this field.
|
||||
std::optional<ColumnsDescription> storage_columns_with_collected_nested;
|
||||
|
||||
/// Positions of columns in part structure.
|
||||
using ColumnPositions = std::vector<std::optional<size_t>>;
|
||||
ColumnPositions column_positions;
|
||||
@ -85,7 +91,6 @@ private:
|
||||
|
||||
ReadBufferFromFileBase::ProfileCallback profile_callback;
|
||||
clockid_t clock_type;
|
||||
|
||||
bool initialized = false;
|
||||
};
|
||||
|
||||
|
@ -10,14 +10,14 @@
|
||||
['0','1','2','3','4','5','6','7','8'] ['','','','','','','','','']
|
||||
[] []
|
||||
[[]] [[]]
|
||||
[[],['0']] [[],['']]
|
||||
[[],['0'],['0','1']] [[],[''],['','']]
|
||||
[[],['0'],['0','1'],['0','1','2']] [[],[''],['',''],['','','']]
|
||||
[[],['0'],['0','1'],['0','1','2'],[]] [[],[''],['',''],['','',''],[]]
|
||||
[[],['0'],['0','1'],['0','1','2'],[],['0']] [[],[''],['',''],['','',''],[],['']]
|
||||
[[],['0'],['0','1'],['0','1','2'],[],['0'],['0','1']] [[],[''],['',''],['','',''],[],[''],['','']]
|
||||
[[],['0'],['0','1'],['0','1','2'],[],['0'],['0','1'],['0','1','2']] [[],[''],['',''],['','',''],[],[''],['',''],['','','']]
|
||||
[[],['0'],['0','1'],['0','1','2'],[],['0'],['0','1'],['0','1','2'],[]] [[],[''],['',''],['','',''],[],[''],['',''],['','',''],[]]
|
||||
[[],['0']] [[],[]]
|
||||
[[],['0'],['0','1']] [[],[],[]]
|
||||
[[],['0'],['0','1'],['0','1','2']] [[],[],[],[]]
|
||||
[[],['0'],['0','1'],['0','1','2'],[]] [[],[],[],[],[]]
|
||||
[[],['0'],['0','1'],['0','1','2'],[],['0']] [[],[],[],[],[],[]]
|
||||
[[],['0'],['0','1'],['0','1','2'],[],['0'],['0','1']] [[],[],[],[],[],[],[]]
|
||||
[[],['0'],['0','1'],['0','1','2'],[],['0'],['0','1'],['0','1','2']] [[],[],[],[],[],[],[],[]]
|
||||
[[],['0'],['0','1'],['0','1','2'],[],['0'],['0','1'],['0','1','2'],[]] [[],[],[],[],[],[],[],[],[]]
|
||||
[] []
|
||||
[{}] [{}]
|
||||
[{},{'k0':0}] [{},{}]
|
||||
|
Loading…
Reference in New Issue
Block a user