2019-08-19 14:40:12 +00:00
|
|
|
#include <Storages/MergeTree/MergeTreePartsMover.h>
|
|
|
|
#include <Storages/MergeTree/MergeTreeData.h>
|
2019-10-10 16:30:30 +00:00
|
|
|
#include <Storages/MergeTree/MergeTreeDataPartFactory.h>
|
|
|
|
|
2019-08-19 14:40:12 +00:00
|
|
|
#include <set>
|
|
|
|
#include <boost/algorithm/string/join.hpp>
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2019-09-02 11:35:53 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int ABORTED;
|
2019-09-03 11:32:25 +00:00
|
|
|
extern const int NO_SUCH_DATA_PART;
|
2019-09-10 11:21:59 +00:00
|
|
|
extern const int LOGICAL_ERROR;
|
2019-09-02 11:35:53 +00:00
|
|
|
}
|
|
|
|
|
2019-08-19 14:40:12 +00:00
|
|
|
namespace
|
|
|
|
{
|
2019-09-02 11:35:53 +00:00
|
|
|
|
2019-08-20 17:16:32 +00:00
|
|
|
/// Contains minimal number of heaviest parts, which sum size on disk is greater than required.
|
2019-09-10 11:21:59 +00:00
|
|
|
/// If there are not enough summary size, than contains all parts.
|
2019-08-19 14:40:12 +00:00
|
|
|
class LargestPartsWithRequiredSize
|
|
|
|
{
|
|
|
|
struct PartsSizeOnDiskComparator
|
|
|
|
{
|
2019-09-02 11:35:53 +00:00
|
|
|
bool operator()(const MergeTreeData::DataPartPtr & f, const MergeTreeData::DataPartPtr & s) const
|
2019-08-19 14:40:12 +00:00
|
|
|
{
|
2019-09-10 11:21:59 +00:00
|
|
|
/// If parts have equal sizes, than order them by names (names are unique)
|
|
|
|
return std::tie(f->bytes_on_disk, f->name) < std::tie(s->bytes_on_disk, s->name);
|
2019-08-19 14:40:12 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
std::set<MergeTreeData::DataPartPtr, PartsSizeOnDiskComparator> elems;
|
|
|
|
UInt64 required_size_sum;
|
|
|
|
UInt64 current_size_sum = 0;
|
|
|
|
|
|
|
|
public:
|
2019-09-02 11:35:53 +00:00
|
|
|
LargestPartsWithRequiredSize(UInt64 required_sum_size_) : required_size_sum(required_sum_size_) {}
|
2019-08-19 14:40:12 +00:00
|
|
|
|
|
|
|
void add(MergeTreeData::DataPartPtr part)
|
|
|
|
{
|
|
|
|
if (current_size_sum < required_size_sum)
|
|
|
|
{
|
|
|
|
elems.emplace(part);
|
|
|
|
current_size_sum += part->bytes_on_disk;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Adding smaller element
|
|
|
|
if (!elems.empty() && (*elems.begin())->bytes_on_disk >= part->bytes_on_disk)
|
|
|
|
return;
|
|
|
|
|
|
|
|
elems.emplace(part);
|
|
|
|
current_size_sum += part->bytes_on_disk;
|
|
|
|
|
|
|
|
while (!elems.empty() && (current_size_sum - (*elems.begin())->bytes_on_disk >= required_size_sum))
|
|
|
|
{
|
|
|
|
current_size_sum -= (*elems.begin())->bytes_on_disk;
|
|
|
|
elems.erase(elems.begin());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-09-10 11:21:59 +00:00
|
|
|
/// Returns parts ordered by size
|
2019-08-19 14:40:12 +00:00
|
|
|
MergeTreeData::DataPartsVector getAccumulatedParts()
|
|
|
|
{
|
|
|
|
MergeTreeData::DataPartsVector res;
|
|
|
|
for (const auto & elem : elems)
|
|
|
|
res.push_back(elem);
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2019-09-05 15:53:23 +00:00
|
|
|
bool MergeTreePartsMover::selectPartsForMove(
|
2019-08-19 14:40:12 +00:00
|
|
|
MergeTreeMovingParts & parts_to_move,
|
2019-09-06 15:09:20 +00:00
|
|
|
const AllowedMovingPredicate & can_move,
|
|
|
|
const std::lock_guard<std::mutex> & /* moving_parts_lock */)
|
2019-08-19 14:40:12 +00:00
|
|
|
{
|
2019-10-10 16:30:30 +00:00
|
|
|
auto data_parts = data->getDataPartsVector();
|
2019-08-19 14:40:12 +00:00
|
|
|
|
|
|
|
if (data_parts.empty())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
std::unordered_map<DiskSpace::DiskPtr, LargestPartsWithRequiredSize> need_to_move;
|
2019-09-05 13:12:29 +00:00
|
|
|
const auto & policy = data->getStoragePolicy();
|
2019-08-19 14:40:12 +00:00
|
|
|
const auto & volumes = policy->getVolumes();
|
|
|
|
|
|
|
|
/// Do not check if policy has one volume
|
|
|
|
if (volumes.size() == 1)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/// Do not check last volume
|
|
|
|
for (size_t i = 0; i != volumes.size() - 1; ++i)
|
|
|
|
{
|
|
|
|
for (const auto & disk : volumes[i]->disks)
|
|
|
|
{
|
2019-09-10 08:56:27 +00:00
|
|
|
UInt64 required_available_space = disk->getTotalSpace() * policy->getMoveFactor();
|
|
|
|
UInt64 unreserved_space = disk->getUnreservedSpace();
|
2019-08-19 14:40:12 +00:00
|
|
|
|
2019-09-10 08:56:27 +00:00
|
|
|
if (required_available_space > unreserved_space)
|
|
|
|
need_to_move.emplace(disk, required_available_space - unreserved_space);
|
2019-08-19 14:40:12 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (const auto & part : data_parts)
|
|
|
|
{
|
2019-09-02 11:35:53 +00:00
|
|
|
String reason;
|
2019-09-10 11:21:59 +00:00
|
|
|
/// Don't report message to log, because logging is excessive
|
2019-09-02 11:35:53 +00:00
|
|
|
if (!can_move(part, &reason))
|
2019-08-19 14:40:12 +00:00
|
|
|
continue;
|
2019-10-10 16:30:30 +00:00
|
|
|
|
2019-08-19 14:40:12 +00:00
|
|
|
auto to_insert = need_to_move.find(part->disk);
|
|
|
|
if (to_insert != need_to_move.end())
|
|
|
|
to_insert->second.add(part);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto && move : need_to_move)
|
|
|
|
{
|
2019-09-10 11:21:59 +00:00
|
|
|
auto min_volume_priority = policy->getVolumeIndexByDisk(move.first) + 1;
|
2019-08-19 14:40:12 +00:00
|
|
|
for (auto && part : move.second.getAccumulatedParts())
|
|
|
|
{
|
|
|
|
auto reservation = policy->reserve(part->bytes_on_disk, min_volume_priority);
|
|
|
|
if (!reservation)
|
|
|
|
{
|
|
|
|
/// Next parts to move from this disk has greater size and same min volume priority
|
|
|
|
/// There are no space for them
|
|
|
|
/// But it can be possible to move data from other disks
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
parts_to_move.emplace_back(part, std::move(reservation));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return !parts_to_move.empty();
|
|
|
|
}
|
|
|
|
|
2019-09-03 11:32:25 +00:00
|
|
|
MergeTreeData::DataPartPtr MergeTreePartsMover::clonePart(const MergeTreeMoveEntry & moving_part) const
|
2019-08-19 14:40:12 +00:00
|
|
|
{
|
2019-09-03 11:32:25 +00:00
|
|
|
if (moves_blocker.isCancelled())
|
|
|
|
throw Exception("Cancelled moving parts.", ErrorCodes::ABORTED);
|
2019-09-02 11:35:53 +00:00
|
|
|
|
2019-09-03 11:32:25 +00:00
|
|
|
LOG_TRACE(log, "Cloning part " << moving_part.part->name);
|
|
|
|
moving_part.part->makeCloneOnDiskDetached(moving_part.reserved_space);
|
2019-09-02 11:35:53 +00:00
|
|
|
|
2019-09-03 11:32:25 +00:00
|
|
|
MergeTreeData::MutableDataPartPtr cloned_part =
|
2019-10-10 16:30:30 +00:00
|
|
|
createPart(*data, moving_part.reserved_space->getDisk(), moving_part.part->name, "detached/" + moving_part.part->name);
|
2019-09-03 11:32:25 +00:00
|
|
|
LOG_TRACE(log, "Part " << moving_part.part->name << " was cloned to " << cloned_part->getFullPath());
|
2019-08-19 14:40:12 +00:00
|
|
|
|
2019-09-03 11:32:25 +00:00
|
|
|
cloned_part->loadColumnsChecksumsIndexes(true, true);
|
|
|
|
return cloned_part;
|
2019-08-19 14:40:12 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-09-03 11:32:25 +00:00
|
|
|
void MergeTreePartsMover::swapClonedPart(const MergeTreeData::DataPartPtr & cloned_part) const
|
2019-08-19 14:40:12 +00:00
|
|
|
{
|
2019-09-03 11:32:25 +00:00
|
|
|
if (moves_blocker.isCancelled())
|
|
|
|
throw Exception("Cancelled moving parts.", ErrorCodes::ABORTED);
|
2019-09-02 11:35:53 +00:00
|
|
|
|
2019-09-05 13:12:29 +00:00
|
|
|
auto active_part = data->getActiveContainingPart(cloned_part->name);
|
2019-09-02 11:35:53 +00:00
|
|
|
|
2019-09-10 11:21:59 +00:00
|
|
|
/// It's ok, because we don't block moving parts for merges or mutations
|
2019-09-03 11:32:25 +00:00
|
|
|
if (!active_part || active_part->name != cloned_part->name)
|
2019-09-10 08:56:27 +00:00
|
|
|
{
|
|
|
|
LOG_INFO(log, "Failed to swap " << cloned_part->name << ". Active part doesn't exist."
|
|
|
|
<< " Possible it was merged or mutated. Will remove copy on path '" << cloned_part->getFullPath() << "'.");
|
|
|
|
return;
|
|
|
|
}
|
2019-08-19 14:40:12 +00:00
|
|
|
|
2019-09-03 11:32:25 +00:00
|
|
|
cloned_part->renameTo(active_part->name);
|
2019-09-10 11:21:59 +00:00
|
|
|
/// TODO what happen if server goes down here?
|
2019-09-05 13:12:29 +00:00
|
|
|
data->swapActivePart(cloned_part);
|
2019-09-05 15:53:23 +00:00
|
|
|
|
|
|
|
LOG_TRACE(log, "Part " << cloned_part->name << " was moved to " << cloned_part->getFullPath());
|
2019-08-19 14:40:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|