From 20d40b8bce03d4a5a4f2fba9859857ff268fc463 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 3 Apr 2020 13:40:46 +0300 Subject: [PATCH] Refactor code add comments --- dbms/Storages/ColumnsDescription.h | 3 ++- dbms/Storages/MergeTree/MergeTreeData.h | 14 ++++++++++++++ .../MergeTree/MergeTreeReaderCompact.cpp | 16 +++++++--------- dbms/Storages/MergeTree/MergeTreeReaderWide.cpp | 16 ++++++++++------ .../MergeTree/ReplicatedMergeTreeQueue.h | 4 +++- 5 files changed, 36 insertions(+), 17 deletions(-) diff --git a/dbms/Storages/ColumnsDescription.h b/dbms/Storages/ColumnsDescription.h index 3acac8d014d..72399a70128 100644 --- a/dbms/Storages/ColumnsDescription.h +++ b/dbms/Storages/ColumnsDescription.h @@ -57,7 +57,8 @@ public: /// `column_name` can be a Nested column name; void remove(const String & column_name); - /// TODO(alesap) + /// Rename column. column_from and column_to cannot be nested columns. + /// TODO add ability to rename nested columns void rename(const String & column_from, const String & column_to); void flattenNested(); /// TODO: remove, insert already flattened Nested columns. diff --git a/dbms/Storages/MergeTree/MergeTreeData.h b/dbms/Storages/MergeTree/MergeTreeData.h index 120ebd2e467..125a90d26e0 100644 --- a/dbms/Storages/MergeTree/MergeTreeData.h +++ b/dbms/Storages/MergeTree/MergeTreeData.h @@ -125,9 +125,18 @@ public: STRONG_TYPEDEF(String, PartitionID) + /// Alter conversions which should be applied on-fly for part. Build from of + /// the most recent mutation commands for part. Now we have only rename_map + /// here (from ALTER_RENAME) command, because for all other type of alters + /// we can deduce conversions for part from difference between + /// part->getColumns() and storage->getColumns(). struct AlterConversions { + /// Rename map new_name -> old_name std::unordered_map rename_map; + + bool isColumnRenamed(const String & new_name) const { return rename_map.count(new_name) > 0; } + String getColumnOldName(const String & new_name) const { return rename_map.at(new_name); } }; struct LessDataPart @@ -652,6 +661,7 @@ public: /// Reserves 0 bytes ReservationPtr makeEmptyReservationOnLargestDisk() { return getStoragePolicy()->makeEmptyReservationOnLargestDisk(); } + /// Return alter conversions for part which must be applied on fly. AlterConversions getAlterConversionsForPart(const MergeTreeDataPartPtr part) const; MergeTreeDataFormatVersion format_version; @@ -915,6 +925,10 @@ protected: /// mechanisms for parts locking virtual bool partIsAssignedToBackgroundOperation(const DataPartPtr & part) const = 0; + /// Return most recent mutations commands for part which weren't applied + /// Used to receive AlterConversions for part and apply them on fly. This + /// method has different implementations for replicated and non replicated + /// MergeTree because they store mutations in different way. virtual MutationCommands getFirtsAlterMutationCommandsForPart(const DataPartPtr & part) const = 0; /// Moves part to specified space, used in ALTER ... MOVE ... queries bool movePartsToSpace(const DataPartsVector & parts, SpacePtr space); diff --git a/dbms/Storages/MergeTree/MergeTreeReaderCompact.cpp b/dbms/Storages/MergeTree/MergeTreeReaderCompact.cpp index a59a87386a5..e4f7275f4a5 100644 --- a/dbms/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/dbms/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -81,12 +81,10 @@ MergeTreeReaderCompact::MergeTreeReaderCompact( const auto & [name, type] = *name_and_type; auto position = data_part->getColumnPosition(name); - if (!position) + if (!position && alter_conversions.isColumnRenamed(name)) { - auto renamed_it = alter_conversions.rename_map.find(name); - - if (renamed_it != alter_conversions.rename_map.end()) - position = data_part->getColumnPosition(renamed_it->second); + String old_name = alter_conversions.getColumnOldName(name); + position = data_part->getColumnPosition(old_name); } if (!position && typeid_cast(type.get())) @@ -136,11 +134,11 @@ size_t MergeTreeReaderCompact::readRows(size_t from_mark, bool continue_reading, auto [name, type] = *name_and_type; - if (alter_conversions.rename_map.count(name)) + if (alter_conversions.isColumnRenamed(name)) { - String original_name = alter_conversions.rename_map[name]; - if (!data_part->getColumnPosition(name) && data_part->getColumnPosition(original_name)) - name = original_name; + String old_name = alter_conversions.getColumnOldName(name); + if (!data_part->getColumnPosition(name) && data_part->getColumnPosition(old_name)) + name = old_name; } auto & column = mutable_columns[pos]; diff --git a/dbms/Storages/MergeTree/MergeTreeReaderWide.cpp b/dbms/Storages/MergeTree/MergeTreeReaderWide.cpp index ae60f8c1733..ad676b4db03 100644 --- a/dbms/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/dbms/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -52,12 +52,16 @@ MergeTreeReaderWide::MergeTreeReaderWide( } else { - auto renamed_it = alter_conversions.rename_map.find(column.name); - if (renamed_it != alter_conversions.rename_map.end() - && columns_from_part.count(renamed_it->second)) - addStreams(renamed_it->second, *columns_from_part[renamed_it->second], profile_callback_, clock_type_); + if (alter_conversions.isColumnRenamed(column.name)) + { + String old_name = alter_conversions.getColumnOldName(column.name); + if (columns_from_part.count(old_name)) + addStreams(old_name, *columns_from_part[old_name], profile_callback_, clock_type_); + } else + { addStreams(column.name, *column.type, profile_callback_, clock_type_); + } } } } @@ -90,9 +94,9 @@ size_t MergeTreeReaderWide::readRows(size_t from_mark, bool continue_reading, si for (size_t pos = 0; pos < num_columns; ++pos, ++name_and_type) { String name = name_and_type->name; - if (alter_conversions.rename_map.count(name)) + if (alter_conversions.isColumnRenamed(name)) { - String original_name = alter_conversions.rename_map[name]; + String original_name = alter_conversions.getColumnOldName(name); if (!columns_from_part.count(name) && columns_from_part.count(original_name)) name = original_name; } diff --git a/dbms/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/dbms/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 6bb3ebea4da..fcb3dfb4b86 100644 --- a/dbms/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/dbms/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -331,7 +331,9 @@ public: MutationCommands getMutationCommands(const MergeTreeData::DataPartPtr & part, Int64 desired_mutation_version) const; - /// TODO(alesap) + /// Return mutation commands for part with smallest mutation version bigger + /// than data part version. Used when we apply alter commands on fly, + /// without actual data modification on disk. MutationCommands getFirstAlterMutationCommandsForPart(const MergeTreeData::DataPartPtr & part) const; /// Mark finished mutations as done. If the function needs to be called again at some later time