ClickHouse/src/Storages/MergeTree/MergeTreePartsMover.cpp

260 lines
9.4 KiB
C++
Raw Normal View History

2019-08-19 14:40:12 +00:00
#include <Storages/MergeTree/MergeTreePartsMover.h>
#include <Storages/MergeTree/MergeTreeData.h>
2019-10-10 16:30:30 +00:00
2019-08-19 14:40:12 +00:00
#include <set>
#include <boost/algorithm/string/join.hpp>
namespace DB
{
2019-09-02 11:35:53 +00:00
namespace ErrorCodes
{
extern const int ABORTED;
}
2019-08-19 14:40:12 +00:00
namespace
{
2019-09-02 11:35:53 +00:00
2019-08-20 17:16:32 +00:00
/// Contains minimal number of heaviest parts, which sum size on disk is greater than required.
2019-09-10 11:21:59 +00:00
/// If there are not enough summary size, than contains all parts.
2019-08-19 14:40:12 +00:00
class LargestPartsWithRequiredSize
{
struct PartsSizeOnDiskComparator
{
2019-09-02 11:35:53 +00:00
bool operator()(const MergeTreeData::DataPartPtr & f, const MergeTreeData::DataPartPtr & s) const
2019-08-19 14:40:12 +00:00
{
2019-09-10 11:21:59 +00:00
/// If parts have equal sizes, than order them by names (names are unique)
2020-03-23 13:32:02 +00:00
UInt64 first_part_size = f->getBytesOnDisk();
UInt64 second_part_size = s->getBytesOnDisk();
return std::tie(first_part_size, f->name) < std::tie(second_part_size, s->name);
2019-08-19 14:40:12 +00:00
}
};
std::set<MergeTreeData::DataPartPtr, PartsSizeOnDiskComparator> elems;
UInt64 required_size_sum;
UInt64 current_size_sum = 0;
public:
2020-03-18 03:27:32 +00:00
explicit LargestPartsWithRequiredSize(UInt64 required_sum_size_) : required_size_sum(required_sum_size_) {}
2019-08-19 14:40:12 +00:00
void add(MergeTreeData::DataPartPtr part)
{
if (current_size_sum < required_size_sum)
{
elems.emplace(part);
2020-03-23 13:32:02 +00:00
current_size_sum += part->getBytesOnDisk();
2019-08-19 14:40:12 +00:00
return;
}
/// Adding smaller element
2020-03-23 13:32:02 +00:00
if (!elems.empty() && (*elems.begin())->getBytesOnDisk() >= part->getBytesOnDisk())
2019-08-19 14:40:12 +00:00
return;
elems.emplace(part);
2020-03-23 13:32:02 +00:00
current_size_sum += part->getBytesOnDisk();
2019-08-19 14:40:12 +00:00
removeRedundantElements();
}
/// Weaken requirements on size
2019-12-05 11:15:47 +00:00
void decreaseRequiredSizeAndRemoveRedundantParts(UInt64 size_decrease)
{
required_size_sum -= std::min(size_decrease, required_size_sum);
removeRedundantElements();
2019-08-19 14:40:12 +00:00
}
2019-09-10 11:21:59 +00:00
/// Returns parts ordered by size
2019-08-19 14:40:12 +00:00
MergeTreeData::DataPartsVector getAccumulatedParts()
{
MergeTreeData::DataPartsVector res;
for (const auto & elem : elems)
res.push_back(elem);
return res;
}
private:
void removeRedundantElements()
{
2020-03-23 13:32:02 +00:00
while (!elems.empty() && (current_size_sum - (*elems.begin())->getBytesOnDisk() >= required_size_sum))
{
2020-03-23 13:32:02 +00:00
current_size_sum -= (*elems.begin())->getBytesOnDisk();
elems.erase(elems.begin());
}
}
2019-08-19 14:40:12 +00:00
};
}
2019-09-05 15:53:23 +00:00
bool MergeTreePartsMover::selectPartsForMove(
2019-08-19 14:40:12 +00:00
MergeTreeMovingParts & parts_to_move,
2019-09-06 15:09:20 +00:00
const AllowedMovingPredicate & can_move,
const std::lock_guard<std::mutex> & /* moving_parts_lock */)
2019-08-19 14:40:12 +00:00
{
unsigned parts_to_move_by_policy_rules = 0;
unsigned parts_to_move_by_ttl_rules = 0;
double parts_to_move_total_size_bytes = 0.0;
MergeTreeData::DataPartsVector data_parts = data->getDataPartsVector();
2019-08-19 14:40:12 +00:00
if (data_parts.empty())
return false;
2019-11-27 09:39:44 +00:00
std::unordered_map<DiskPtr, LargestPartsWithRequiredSize> need_to_move;
const auto policy = data->getStoragePolicy();
2019-08-19 14:40:12 +00:00
const auto & volumes = policy->getVolumes();
2020-03-09 02:55:28 +00:00
if (!volumes.empty())
2019-08-19 14:40:12 +00:00
{
/// Do not check last volume
for (size_t i = 0; i != volumes.size() - 1; ++i)
2019-08-19 14:40:12 +00:00
{
for (const auto & disk : volumes[i]->getDisks())
{
UInt64 required_maximum_available_space = disk->getTotalSpace() * policy->getMoveFactor();
UInt64 unreserved_space = disk->getUnreservedSpace();
2019-08-19 14:40:12 +00:00
if (unreserved_space < required_maximum_available_space)
need_to_move.emplace(disk, required_maximum_available_space - unreserved_space);
}
2019-08-19 14:40:12 +00:00
}
}
time_t time_of_move = time(nullptr);
2020-09-02 12:16:12 +00:00
auto metadata_snapshot = data->getInMemoryMetadataPtr();
2019-08-19 14:40:12 +00:00
for (const auto & part : data_parts)
{
2019-09-02 11:35:53 +00:00
String reason;
/// Don't report message to log, because logging is excessive.
2019-09-02 11:35:53 +00:00
if (!can_move(part, &reason))
2019-08-19 14:40:12 +00:00
continue;
2020-09-07 07:59:14 +00:00
auto ttl_entry = selectTTLDescriptionForTTLInfos(metadata_snapshot->getMoveTTLs(), part->ttl_infos.moves_ttl, time_of_move, true);
2020-09-02 12:16:12 +00:00
auto to_insert = need_to_move.find(part->volume->getDisk());
2019-12-12 11:10:18 +00:00
ReservationPtr reservation;
if (ttl_entry)
{
2020-09-18 15:41:14 +00:00
auto destination = data->getDestinationForMoveTTL(*ttl_entry);
2020-05-25 17:07:14 +00:00
if (destination && !data->isPartInTTLDestination(*ttl_entry, *part))
2020-09-18 15:41:14 +00:00
reservation = data->tryReserveSpace(part->getBytesOnDisk(), data->getDestinationForMoveTTL(*ttl_entry));
}
if (reservation) /// Found reservation by TTL rule.
{
parts_to_move.emplace_back(part, std::move(reservation));
/// If table TTL rule satisfies on this part, won't apply policy rules on it.
/// In order to not over-move, we need to "release" required space on this disk,
/// possibly to zero.
if (to_insert != need_to_move.end())
{
2020-03-23 13:32:02 +00:00
to_insert->second.decreaseRequiredSizeAndRemoveRedundantParts(part->getBytesOnDisk());
}
++parts_to_move_by_ttl_rules;
2020-03-23 13:32:02 +00:00
parts_to_move_total_size_bytes += part->getBytesOnDisk();
}
else
{
if (to_insert != need_to_move.end())
to_insert->second.add(part);
}
2019-08-19 14:40:12 +00:00
}
for (auto && move : need_to_move)
{
auto min_volume_index = policy->getVolumeIndexByDisk(move.first) + 1;
2019-08-19 14:40:12 +00:00
for (auto && part : move.second.getAccumulatedParts())
{
2020-03-23 13:32:02 +00:00
auto reservation = policy->reserve(part->getBytesOnDisk(), min_volume_index);
2019-08-19 14:40:12 +00:00
if (!reservation)
{
/// Next parts to move from this disk has greater size and same min volume index.
/// There are no space for them.
/// But it can be possible to move data from other disks.
2019-08-19 14:40:12 +00:00
break;
}
parts_to_move.emplace_back(part, std::move(reservation));
++parts_to_move_by_policy_rules;
2020-03-23 13:32:02 +00:00
parts_to_move_total_size_bytes += part->getBytesOnDisk();
2019-08-19 14:40:12 +00:00
}
}
2019-12-12 21:38:05 +00:00
if (!parts_to_move.empty())
{
LOG_DEBUG(log, "Selected {} parts to move according to storage policy rules and {} parts according to TTL rules, {} total", parts_to_move_by_policy_rules, parts_to_move_by_ttl_rules, ReadableSize(parts_to_move_total_size_bytes));
2019-12-12 21:38:05 +00:00
return true;
}
else
return false;
2019-08-19 14:40:12 +00:00
}
2019-09-03 11:32:25 +00:00
MergeTreeData::DataPartPtr MergeTreePartsMover::clonePart(const MergeTreeMoveEntry & moving_part) const
2019-08-19 14:40:12 +00:00
{
2019-09-03 11:32:25 +00:00
if (moves_blocker.isCancelled())
throw Exception("Cancelled moving parts.", ErrorCodes::ABORTED);
2019-09-02 11:35:53 +00:00
2021-03-05 17:24:06 +00:00
auto settings = data->getSettings();
auto part = moving_part.part;
auto disk = moving_part.reserved_space->getDisk();
2021-07-05 03:32:56 +00:00
LOG_DEBUG(log, "Cloning part {} from {} to {}", part->name, part->volume->getDisk()->getName(), disk->getName());
const String directory_to_move = "moving";
2021-07-05 03:32:56 +00:00
if (disk->supportZeroCopyReplication() && settings->allow_remote_fs_zero_copy_replication)
2021-03-05 17:24:06 +00:00
{
2021-07-05 03:32:56 +00:00
/// Try zero-copy replication and fallback to default copy if it's not possible
2021-03-05 17:24:06 +00:00
moving_part.part->assertOnDisk();
2021-05-08 10:59:55 +00:00
String path_to_clone = fs::path(data->getRelativeDataPath()) / directory_to_move / "";
2021-03-05 17:24:06 +00:00
String relative_path = part->relative_path;
if (disk->exists(path_to_clone + relative_path))
{
LOG_WARNING(log, "Path " + fullPath(disk, path_to_clone + relative_path) + " already exists. Will remove it and clone again.");
2021-05-08 10:59:55 +00:00
disk->removeRecursive(fs::path(path_to_clone) / relative_path / "");
2021-03-05 17:24:06 +00:00
}
disk->createDirectories(path_to_clone);
2021-05-08 10:59:55 +00:00
bool is_fetched = data->tryToFetchIfShared(*part, disk, fs::path(path_to_clone) / part->name);
2021-03-05 17:24:06 +00:00
if (!is_fetched)
2021-05-08 10:59:55 +00:00
part->volume->getDisk()->copy(fs::path(data->getRelativeDataPath()) / relative_path / "", disk, path_to_clone);
part->volume->getDisk()->removeFileIfExists(fs::path(path_to_clone) / IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME);
2021-03-05 17:24:06 +00:00
}
else
{
part->makeCloneOnDisk(disk, directory_to_move);
}
2019-09-02 11:35:53 +00:00
2021-03-05 17:24:06 +00:00
auto single_disk_volume = std::make_shared<SingleDiskVolume>("volume_" + part->name, moving_part.reserved_space->getDisk(), 0);
2019-09-03 11:32:25 +00:00
MergeTreeData::MutableDataPartPtr cloned_part =
2021-05-08 10:59:55 +00:00
data->createPart(part->name, single_disk_volume, fs::path(directory_to_move) / part->name);
2021-03-05 17:24:06 +00:00
LOG_TRACE(log, "Part {} was cloned to {}", part->name, cloned_part->getFullPath());
2019-08-19 14:40:12 +00:00
2019-09-03 11:32:25 +00:00
cloned_part->loadColumnsChecksumsIndexes(true, true);
return cloned_part;
2019-08-19 14:40:12 +00:00
}
2019-09-03 11:32:25 +00:00
void MergeTreePartsMover::swapClonedPart(const MergeTreeData::DataPartPtr & cloned_part) const
2019-08-19 14:40:12 +00:00
{
2019-09-03 11:32:25 +00:00
if (moves_blocker.isCancelled())
throw Exception("Cancelled moving parts.", ErrorCodes::ABORTED);
2019-09-02 11:35:53 +00:00
2019-09-05 13:12:29 +00:00
auto active_part = data->getActiveContainingPart(cloned_part->name);
2019-09-02 11:35:53 +00:00
2019-09-10 11:21:59 +00:00
/// It's ok, because we don't block moving parts for merges or mutations
2019-09-03 11:32:25 +00:00
if (!active_part || active_part->name != cloned_part->name)
{
2020-05-23 22:24:01 +00:00
LOG_INFO(log, "Failed to swap {}. Active part doesn't exist. Possible it was merged or mutated. Will remove copy on path '{}'.", cloned_part->name, cloned_part->getFullPath());
return;
}
2019-08-19 14:40:12 +00:00
/// Don't remove new directory but throw an error because it may contain part which is currently in use.
cloned_part->renameTo(active_part->name, false);
2019-09-10 11:21:59 +00:00
/// TODO what happen if server goes down here?
2019-09-05 13:12:29 +00:00
data->swapActivePart(cloned_part);
2019-09-05 15:53:23 +00:00
2020-05-23 22:24:01 +00:00
LOG_TRACE(log, "Part {} was moved to {}", cloned_part->name, cloned_part->getFullPath());
2019-08-19 14:40:12 +00:00
}
}