2019-08-19 14:40:12 +00:00
# include <Storages/MergeTree/MergeTreePartsMover.h>
# include <Storages/MergeTree/MergeTreeData.h>
2019-10-10 16:30:30 +00:00
2019-08-19 14:40:12 +00:00
# include <set>
# include <boost/algorithm/string/join.hpp>
namespace DB
{
2019-09-02 11:35:53 +00:00
namespace ErrorCodes
{
extern const int ABORTED ;
}
2019-08-19 14:40:12 +00:00
namespace
{
2019-09-02 11:35:53 +00:00
2019-08-20 17:16:32 +00:00
/// Contains minimal number of heaviest parts, which sum size on disk is greater than required.
2019-09-10 11:21:59 +00:00
/// If there are not enough summary size, than contains all parts.
2019-08-19 14:40:12 +00:00
class LargestPartsWithRequiredSize
{
struct PartsSizeOnDiskComparator
{
2019-09-02 11:35:53 +00:00
bool operator ( ) ( const MergeTreeData : : DataPartPtr & f , const MergeTreeData : : DataPartPtr & s ) const
2019-08-19 14:40:12 +00:00
{
2019-09-10 11:21:59 +00:00
/// If parts have equal sizes, than order them by names (names are unique)
2020-03-23 13:32:02 +00:00
UInt64 first_part_size = f - > getBytesOnDisk ( ) ;
UInt64 second_part_size = s - > getBytesOnDisk ( ) ;
return std : : tie ( first_part_size , f - > name ) < std : : tie ( second_part_size , s - > name ) ;
2019-08-19 14:40:12 +00:00
}
} ;
std : : set < MergeTreeData : : DataPartPtr , PartsSizeOnDiskComparator > elems ;
UInt64 required_size_sum ;
UInt64 current_size_sum = 0 ;
public :
2020-03-18 03:27:32 +00:00
explicit LargestPartsWithRequiredSize ( UInt64 required_sum_size_ ) : required_size_sum ( required_sum_size_ ) { }
2019-08-19 14:40:12 +00:00
void add ( MergeTreeData : : DataPartPtr part )
{
if ( current_size_sum < required_size_sum )
{
elems . emplace ( part ) ;
2020-03-23 13:32:02 +00:00
current_size_sum + = part - > getBytesOnDisk ( ) ;
2019-08-19 14:40:12 +00:00
return ;
}
/// Adding smaller element
2020-03-23 13:32:02 +00:00
if ( ! elems . empty ( ) & & ( * elems . begin ( ) ) - > getBytesOnDisk ( ) > = part - > getBytesOnDisk ( ) )
2019-08-19 14:40:12 +00:00
return ;
elems . emplace ( part ) ;
2020-03-23 13:32:02 +00:00
current_size_sum + = part - > getBytesOnDisk ( ) ;
2019-08-19 14:40:12 +00:00
2019-10-31 10:40:11 +00:00
removeRedundantElements ( ) ;
}
/// Weaken requirements on size
2019-12-05 11:15:47 +00:00
void decreaseRequiredSizeAndRemoveRedundantParts ( UInt64 size_decrease )
2019-10-31 10:40:11 +00:00
{
required_size_sum - = std : : min ( size_decrease , required_size_sum ) ;
removeRedundantElements ( ) ;
2019-08-19 14:40:12 +00:00
}
2019-09-10 11:21:59 +00:00
/// Returns parts ordered by size
2019-08-19 14:40:12 +00:00
MergeTreeData : : DataPartsVector getAccumulatedParts ( )
{
MergeTreeData : : DataPartsVector res ;
for ( const auto & elem : elems )
res . push_back ( elem ) ;
return res ;
}
2019-10-31 10:40:11 +00:00
private :
void removeRedundantElements ( )
{
2020-03-23 13:32:02 +00:00
while ( ! elems . empty ( ) & & ( current_size_sum - ( * elems . begin ( ) ) - > getBytesOnDisk ( ) > = required_size_sum ) )
2019-10-31 10:40:11 +00:00
{
2020-03-23 13:32:02 +00:00
current_size_sum - = ( * elems . begin ( ) ) - > getBytesOnDisk ( ) ;
2019-10-31 10:40:11 +00:00
elems . erase ( elems . begin ( ) ) ;
}
}
2019-08-19 14:40:12 +00:00
} ;
}
2019-09-05 15:53:23 +00:00
bool MergeTreePartsMover : : selectPartsForMove (
2019-08-19 14:40:12 +00:00
MergeTreeMovingParts & parts_to_move ,
2019-09-06 15:09:20 +00:00
const AllowedMovingPredicate & can_move ,
const std : : lock_guard < std : : mutex > & /* moving_parts_lock */ )
2019-08-19 14:40:12 +00:00
{
2019-12-12 21:34:57 +00:00
unsigned parts_to_move_by_policy_rules = 0 ;
unsigned parts_to_move_by_ttl_rules = 0 ;
double parts_to_move_total_size_bytes = 0.0 ;
2019-12-18 16:27:49 +00:00
MergeTreeData : : DataPartsVector data_parts = data - > getDataPartsVector ( ) ;
2019-08-19 14:40:12 +00:00
if ( data_parts . empty ( ) )
return false ;
2019-11-27 09:39:44 +00:00
std : : unordered_map < DiskPtr , LargestPartsWithRequiredSize > need_to_move ;
2020-01-09 14:50:34 +00:00
const auto policy = data - > getStoragePolicy ( ) ;
2019-08-19 14:40:12 +00:00
const auto & volumes = policy - > getVolumes ( ) ;
2020-03-09 02:55:28 +00:00
if ( ! volumes . empty ( ) )
2019-08-19 14:40:12 +00:00
{
2019-10-31 10:40:11 +00:00
/// Do not check last volume
for ( size_t i = 0 ; i ! = volumes . size ( ) - 1 ; + + i )
2019-08-19 14:40:12 +00:00
{
2020-05-09 21:24:15 +00:00
for ( const auto & disk : volumes [ i ] - > getDisks ( ) )
2019-10-31 10:40:11 +00:00
{
UInt64 required_maximum_available_space = disk - > getTotalSpace ( ) * policy - > getMoveFactor ( ) ;
UInt64 unreserved_space = disk - > getUnreservedSpace ( ) ;
2019-08-19 14:40:12 +00:00
2019-10-31 10:40:11 +00:00
if ( unreserved_space < required_maximum_available_space )
need_to_move . emplace ( disk , required_maximum_available_space - unreserved_space ) ;
}
2019-08-19 14:40:12 +00:00
}
}
2019-11-29 07:00:43 +00:00
time_t time_of_move = time ( nullptr ) ;
2020-09-02 12:16:12 +00:00
auto metadata_snapshot = data - > getInMemoryMetadataPtr ( ) ;
2019-08-19 14:40:12 +00:00
for ( const auto & part : data_parts )
{
2019-09-02 11:35:53 +00:00
String reason ;
2019-11-29 07:00:43 +00:00
/// Don't report message to log, because logging is excessive.
2019-09-02 11:35:53 +00:00
if ( ! can_move ( part , & reason ) )
2019-08-19 14:40:12 +00:00
continue ;
2019-12-18 16:41:11 +00:00
2020-09-07 07:59:14 +00:00
auto ttl_entry = selectTTLDescriptionForTTLInfos ( metadata_snapshot - > getMoveTTLs ( ) , part - > ttl_infos . moves_ttl , time_of_move , true ) ;
2020-09-02 12:16:12 +00:00
2020-05-09 21:24:15 +00:00
auto to_insert = need_to_move . find ( part - > volume - > getDisk ( ) ) ;
2019-12-12 11:10:18 +00:00
ReservationPtr reservation ;
2020-01-15 08:28:48 +00:00
if ( ttl_entry )
2019-11-29 07:00:43 +00:00
{
2020-09-18 15:41:14 +00:00
auto destination = data - > getDestinationForMoveTTL ( * ttl_entry ) ;
2020-05-25 17:07:14 +00:00
if ( destination & & ! data - > isPartInTTLDestination ( * ttl_entry , * part ) )
2020-09-18 15:41:14 +00:00
reservation = data - > tryReserveSpace ( part - > getBytesOnDisk ( ) , data - > getDestinationForMoveTTL ( * ttl_entry ) ) ;
2019-11-29 07:00:43 +00:00
}
2019-12-05 08:14:47 +00:00
if ( reservation ) /// Found reservation by TTL rule.
2019-10-22 07:55:36 +00:00
{
2019-10-31 10:40:11 +00:00
parts_to_move . emplace_back ( part , std : : move ( reservation ) ) ;
/// If table TTL rule satisfies on this part, won't apply policy rules on it.
/// In order to not over-move, we need to "release" required space on this disk,
/// possibly to zero.
if ( to_insert ! = need_to_move . end ( ) )
2019-10-22 07:55:36 +00:00
{
2020-03-23 13:32:02 +00:00
to_insert - > second . decreaseRequiredSizeAndRemoveRedundantParts ( part - > getBytesOnDisk ( ) ) ;
2019-10-22 07:55:36 +00:00
}
2019-12-12 21:34:57 +00:00
+ + parts_to_move_by_ttl_rules ;
2020-03-23 13:32:02 +00:00
parts_to_move_total_size_bytes + = part - > getBytesOnDisk ( ) ;
2019-10-22 07:55:36 +00:00
}
2019-10-31 10:40:11 +00:00
else
2019-10-22 07:55:36 +00:00
{
2019-10-31 10:40:11 +00:00
if ( to_insert ! = need_to_move . end ( ) )
to_insert - > second . add ( part ) ;
2019-10-22 07:55:36 +00:00
}
2019-08-19 14:40:12 +00:00
}
for ( auto & & move : need_to_move )
{
2019-10-22 07:55:36 +00:00
auto min_volume_index = policy - > getVolumeIndexByDisk ( move . first ) + 1 ;
2019-08-19 14:40:12 +00:00
for ( auto & & part : move . second . getAccumulatedParts ( ) )
{
2020-03-23 13:32:02 +00:00
auto reservation = policy - > reserve ( part - > getBytesOnDisk ( ) , min_volume_index ) ;
2019-08-19 14:40:12 +00:00
if ( ! reservation )
{
2019-11-29 07:00:43 +00:00
/// Next parts to move from this disk has greater size and same min volume index.
/// There are no space for them.
/// But it can be possible to move data from other disks.
2019-08-19 14:40:12 +00:00
break ;
}
parts_to_move . emplace_back ( part , std : : move ( reservation ) ) ;
2019-12-12 21:34:57 +00:00
+ + parts_to_move_by_policy_rules ;
2020-03-23 13:32:02 +00:00
parts_to_move_total_size_bytes + = part - > getBytesOnDisk ( ) ;
2019-08-19 14:40:12 +00:00
}
}
2019-12-12 21:38:05 +00:00
if ( ! parts_to_move . empty ( ) )
{
2021-04-15 18:00:16 +00:00
LOG_DEBUG ( log , " Selected {} parts to move according to storage policy rules and {} parts according to TTL rules, {} total " , parts_to_move_by_policy_rules , parts_to_move_by_ttl_rules , ReadableSize ( parts_to_move_total_size_bytes ) ) ;
2019-12-12 21:38:05 +00:00
return true ;
}
else
return false ;
2019-08-19 14:40:12 +00:00
}
2019-09-03 11:32:25 +00:00
MergeTreeData : : DataPartPtr MergeTreePartsMover : : clonePart ( const MergeTreeMoveEntry & moving_part ) const
2019-08-19 14:40:12 +00:00
{
2019-09-03 11:32:25 +00:00
if ( moves_blocker . isCancelled ( ) )
throw Exception ( " Cancelled moving parts. " , ErrorCodes : : ABORTED ) ;
2019-09-02 11:35:53 +00:00
2021-03-05 17:24:06 +00:00
auto settings = data - > getSettings ( ) ;
auto part = moving_part . part ;
auto disk = moving_part . reserved_space - > getDisk ( ) ;
2021-07-05 03:32:56 +00:00
LOG_DEBUG ( log , " Cloning part {} from {} to {} " , part - > name , part - > volume - > getDisk ( ) - > getName ( ) , disk - > getName ( ) ) ;
2020-10-15 13:55:13 +00:00
const String directory_to_move = " moving " ;
2021-07-05 03:32:56 +00:00
if ( disk - > supportZeroCopyReplication ( ) & & settings - > allow_remote_fs_zero_copy_replication )
2021-03-05 17:24:06 +00:00
{
2021-07-05 03:32:56 +00:00
/// Try zero-copy replication and fallback to default copy if it's not possible
2021-03-05 17:24:06 +00:00
moving_part . part - > assertOnDisk ( ) ;
2021-05-08 10:59:55 +00:00
String path_to_clone = fs : : path ( data - > getRelativeDataPath ( ) ) / directory_to_move / " " ;
2021-03-05 17:24:06 +00:00
String relative_path = part - > relative_path ;
if ( disk - > exists ( path_to_clone + relative_path ) )
{
LOG_WARNING ( log , " Path " + fullPath ( disk , path_to_clone + relative_path ) + " already exists. Will remove it and clone again. " ) ;
2021-05-08 10:59:55 +00:00
disk - > removeRecursive ( fs : : path ( path_to_clone ) / relative_path / " " ) ;
2021-03-05 17:24:06 +00:00
}
disk - > createDirectories ( path_to_clone ) ;
2021-05-08 10:59:55 +00:00
bool is_fetched = data - > tryToFetchIfShared ( * part , disk , fs : : path ( path_to_clone ) / part - > name ) ;
2021-03-05 17:24:06 +00:00
if ( ! is_fetched )
2021-05-08 10:59:55 +00:00
part - > volume - > getDisk ( ) - > copy ( fs : : path ( data - > getRelativeDataPath ( ) ) / relative_path / " " , disk , path_to_clone ) ;
part - > volume - > getDisk ( ) - > removeFileIfExists ( fs : : path ( path_to_clone ) / IMergeTreeDataPart : : DELETE_ON_DESTROY_MARKER_FILE_NAME ) ;
2021-03-05 17:24:06 +00:00
}
else
{
part - > makeCloneOnDisk ( disk , directory_to_move ) ;
}
2019-09-02 11:35:53 +00:00
2021-03-05 17:24:06 +00:00
auto single_disk_volume = std : : make_shared < SingleDiskVolume > ( " volume_ " + part - > name , moving_part . reserved_space - > getDisk ( ) , 0 ) ;
2019-09-03 11:32:25 +00:00
MergeTreeData : : MutableDataPartPtr cloned_part =
2021-05-08 10:59:55 +00:00
data - > createPart ( part - > name , single_disk_volume , fs : : path ( directory_to_move ) / part - > name ) ;
2021-03-05 17:24:06 +00:00
LOG_TRACE ( log , " Part {} was cloned to {} " , part - > name , cloned_part - > getFullPath ( ) ) ;
2019-08-19 14:40:12 +00:00
2019-09-03 11:32:25 +00:00
cloned_part - > loadColumnsChecksumsIndexes ( true , true ) ;
return cloned_part ;
2019-08-19 14:40:12 +00:00
}
2019-09-03 11:32:25 +00:00
void MergeTreePartsMover : : swapClonedPart ( const MergeTreeData : : DataPartPtr & cloned_part ) const
2019-08-19 14:40:12 +00:00
{
2019-09-03 11:32:25 +00:00
if ( moves_blocker . isCancelled ( ) )
throw Exception ( " Cancelled moving parts. " , ErrorCodes : : ABORTED ) ;
2019-09-02 11:35:53 +00:00
2019-09-05 13:12:29 +00:00
auto active_part = data - > getActiveContainingPart ( cloned_part - > name ) ;
2019-09-02 11:35:53 +00:00
2019-09-10 11:21:59 +00:00
/// It's ok, because we don't block moving parts for merges or mutations
2019-09-03 11:32:25 +00:00
if ( ! active_part | | active_part - > name ! = cloned_part - > name )
2019-09-10 08:56:27 +00:00
{
2020-05-23 22:24:01 +00:00
LOG_INFO ( log , " Failed to swap {}. Active part doesn't exist. Possible it was merged or mutated. Will remove copy on path '{}'. " , cloned_part - > name , cloned_part - > getFullPath ( ) ) ;
2019-09-10 08:56:27 +00:00
return ;
}
2019-08-19 14:40:12 +00:00
2020-01-16 11:52:43 +00:00
/// Don't remove new directory but throw an error because it may contain part which is currently in use.
2020-01-16 06:44:02 +00:00
cloned_part - > renameTo ( active_part - > name , false ) ;
2020-01-16 11:52:43 +00:00
2019-09-10 11:21:59 +00:00
/// TODO what happen if server goes down here?
2019-09-05 13:12:29 +00:00
data - > swapActivePart ( cloned_part ) ;
2019-09-05 15:53:23 +00:00
2020-05-23 22:24:01 +00:00
LOG_TRACE ( log , " Part {} was moved to {} " , cloned_part - > name , cloned_part - > getFullPath ( ) ) ;
2019-08-19 14:40:12 +00:00
}
}