mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-01 12:01:58 +00:00
fix alter column rename
This commit is contained in:
parent
56039c8780
commit
04fd72cdef
@ -524,6 +524,7 @@ private:
|
||||
/// Map from name of column to its serialization info.
|
||||
SerializationInfoByName serialization_infos;
|
||||
|
||||
/// Serializations for every columns and subcolumns by their names.
|
||||
SerializationByName serializations;
|
||||
|
||||
/// Columns description for more convenient access
|
||||
|
@ -40,6 +40,8 @@ IMergeTreeReader::IMergeTreeReader(
|
||||
, metadata_snapshot(metadata_snapshot_)
|
||||
, all_mark_ranges(all_mark_ranges_)
|
||||
, alter_conversions(storage.getAlterConversionsForPart(data_part))
|
||||
/// For wide parts convert plain arrays of Nested to subcolumns
|
||||
/// to allow to use shared offset column from cache.
|
||||
, requested_columns(isWidePart(data_part) ? Nested::convertToSubcolumns(columns_) : columns_)
|
||||
, part_columns(isWidePart(data_part) ? Nested::collect(data_part->getColumns()) : data_part->getColumns())
|
||||
{
|
||||
|
@ -75,7 +75,10 @@ protected:
|
||||
/// Stores states for IDataType::deserializeBinaryBulk
|
||||
DeserializeBinaryBulkStateMap deserialize_binary_bulk_state_map;
|
||||
|
||||
/// Actual column names and types of columns in part,
|
||||
/// which may differ from table metadata.
|
||||
NamesAndTypes columns_to_read;
|
||||
/// Actual serialization of columns in part.
|
||||
Serializations serializations;
|
||||
|
||||
UncompressedCache * uncompressed_cache;
|
||||
|
@ -583,7 +583,7 @@ void MergeTreeDataPartWriterWide::finishDataSerialization(bool sync)
|
||||
{
|
||||
if (column.type->isValueRepresentedByNumber()
|
||||
&& !column.type->haveSubtypes()
|
||||
&& data_part->getSerialization(columnn.name)->getKind() == ISerialization::Kind::DEFAULT)
|
||||
&& data_part->getSerialization(column.name)->getKind() == ISerialization::Kind::DEFAULT)
|
||||
{
|
||||
validateColumnOfFixedSize(column);
|
||||
}
|
||||
|
@ -459,8 +459,21 @@ static NameToNameVector collectFilesForRenames(
|
||||
const MutationCommands & commands_for_removes,
|
||||
const String & mrk_extension)
|
||||
{
|
||||
/// Collect counts for shared streams of different columns. As an example, Nested columns have shared stream with array sizes.
|
||||
std::unordered_map<String, size_t> stream_counts;
|
||||
for (const auto & column : source_part->getColumns())
|
||||
{
|
||||
if (auto serialization = source_part->tryGetSerialization(column.name))
|
||||
{
|
||||
serialization->enumerateStreams(
|
||||
[&](const ISerialization::SubstreamPath & substream_path)
|
||||
{
|
||||
++stream_counts[ISerialization::getFileNameForStream(column, substream_path)];
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
NameToNameVector rename_vector;
|
||||
NameSet renamed_streams;
|
||||
|
||||
/// Remove old data
|
||||
for (const auto & command : commands_for_removes)
|
||||
@ -483,6 +496,22 @@ static NameToNameVector collectFilesForRenames(
|
||||
if (source_part->checksums.has(command.column_name + ".proj"))
|
||||
rename_vector.emplace_back(command.column_name + ".proj", "");
|
||||
}
|
||||
else if (command.type == MutationCommand::Type::DROP_COLUMN)
|
||||
{
|
||||
ISerialization::StreamCallback callback = [&](const ISerialization::SubstreamPath & substream_path)
|
||||
{
|
||||
String stream_name = ISerialization::getFileNameForStream({command.column_name, command.data_type}, substream_path);
|
||||
/// Delete files if they are no longer shared with another column.
|
||||
if (--stream_counts[stream_name] == 0)
|
||||
{
|
||||
rename_vector.emplace_back(stream_name + ".bin", "");
|
||||
rename_vector.emplace_back(stream_name + mrk_extension, "");
|
||||
}
|
||||
};
|
||||
|
||||
if (auto serialization = source_part->tryGetSerialization(command.column_name))
|
||||
serialization->enumerateStreams(callback);
|
||||
}
|
||||
else if (command.type == MutationCommand::Type::RENAME_COLUMN)
|
||||
{
|
||||
String escaped_name_from = escapeForFileName(command.column_name);
|
||||
@ -495,7 +524,6 @@ static NameToNameVector collectFilesForRenames(
|
||||
|
||||
if (stream_from != stream_to)
|
||||
{
|
||||
renamed_streams.insert(stream_from);
|
||||
rename_vector.emplace_back(stream_from + ".bin", stream_to + ".bin");
|
||||
rename_vector.emplace_back(stream_from + mrk_extension, stream_to + mrk_extension);
|
||||
}
|
||||
@ -504,42 +532,39 @@ static NameToNameVector collectFilesForRenames(
|
||||
if (auto serialization = source_part->tryGetSerialization(command.column_name))
|
||||
serialization->enumerateStreams(callback);
|
||||
}
|
||||
}
|
||||
else if (command.type == MutationCommand::Type::READ_COLUMN)
|
||||
{
|
||||
/// Remove files for streams that exist in source_part,
|
||||
/// but were removed in new_part by MODIFY COLUMN from
|
||||
/// type with higher number of streams (e.g. LowCardinality -> String).
|
||||
|
||||
auto collect_all_stream_names = [&](const auto & data_part)
|
||||
auto collect_stream_names = [&](const auto & data_part)
|
||||
{
|
||||
NameSet res;
|
||||
for (const auto & column : data_part->getColumns())
|
||||
{
|
||||
if (auto serialization = data_part->tryGetSerialization(column.name))
|
||||
if (auto serialization = data_part->tryGetSerialization(command.column_name))
|
||||
{
|
||||
serialization->enumerateStreams(
|
||||
[&](const ISerialization::SubstreamPath & substream_path)
|
||||
{
|
||||
res.insert(ISerialization::getFileNameForStream(column.name, substream_path));
|
||||
res.insert(ISerialization::getFileNameForStream(command.column_name, substream_path));
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
};
|
||||
|
||||
/// Remove files for streams that exists in source part,
|
||||
/// but were removed in new_part by DROP COLUMN
|
||||
/// or MODIFY COLUMN from type with higher number of streams
|
||||
/// (e.g. LowCardinality -> String).
|
||||
|
||||
auto old_streams = collect_all_stream_names(source_part);
|
||||
auto new_streams = collect_all_stream_names(new_part);
|
||||
auto old_streams = collect_stream_names(source_part);
|
||||
auto new_streams = collect_stream_names(new_part);
|
||||
|
||||
for (const auto & old_stream : old_streams)
|
||||
{
|
||||
if (!new_streams.contains(old_stream) && !renamed_streams.contains(old_stream))
|
||||
if (!new_streams.contains(old_stream))
|
||||
{
|
||||
rename_vector.emplace_back(old_stream + ".bin", "");
|
||||
rename_vector.emplace_back(old_stream + mrk_extension, "");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rename_vector;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user