allow PreCommitted parts to contain each other, clean up code

This commit is contained in:
Alexey Zatelepin 2018-02-19 18:31:43 +03:00
parent 958a6f0999
commit 5bc6bd55b1
4 changed files with 249 additions and 295 deletions

View File

@ -108,8 +108,8 @@ MergeTreeData::MergeTreeData(
full_path(full_path_),
broken_part_callback(broken_part_callback_),
log_name(database_name + "." + table_name), log(&Logger::get(log_name + " (Data)")),
data_parts_by_name(data_parts_indexes.get<TagByName>()),
data_parts_by_state_and_name(data_parts_indexes.get<TagByStateAndName>())
data_parts_by_info(data_parts_indexes.get<TagByInfo>()),
data_parts_by_state_and_info(data_parts_indexes.get<TagByStateAndInfo>())
{
merging_params.check(columns);
@ -418,7 +418,7 @@ Int64 MergeTreeData::getMaxDataPartIndex()
std::lock_guard<std::mutex> lock_all(data_parts_mutex);
Int64 max_block_id = 0;
for (const DataPartPtr & part : data_parts_by_name)
for (const DataPartPtr & part : data_parts_by_info)
max_block_id = std::max(max_block_id, part->info.max_block);
return max_block_id;
@ -552,11 +552,11 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
if (data_parts_indexes.size() >= 2)
{
/// Now all parts are committed, so data_parts_by_state_and_name == committed_parts_range
auto prev_jt = data_parts_by_state_and_name.begin();
/// Now all parts are committed, so data_parts_by_state_and_info == committed_parts_range
auto prev_jt = data_parts_by_state_and_info.begin();
auto curr_jt = std::next(prev_jt);
auto deactivate_part = [&] (DataPartIteratorByStateAndName it)
auto deactivate_part = [&] (DataPartIteratorByStateAndInfo it)
{
(*it)->remove_time = (*it)->modification_time;
modifyPartState(it, DataPartState::Outdated);
@ -564,7 +564,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
(*prev_jt)->assertState({DataPartState::Committed});
while (curr_jt != data_parts_by_state_and_name.end() && (*curr_jt)->state == DataPartState::Committed)
while (curr_jt != data_parts_by_state_and_info.end() && (*curr_jt)->state == DataPartState::Committed)
{
/// Don't consider data parts belonging to different partitions.
if ((*curr_jt)->info.partition_id != (*prev_jt)->info.partition_id)
@ -664,7 +664,7 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts()
return res;
time_t now = time(nullptr);
std::vector<DataPartIteratorByStateAndName> parts_to_delete;
std::vector<DataPartIteratorByStateAndInfo> parts_to_delete;
{
std::lock_guard<std::mutex> lock_parts(data_parts_mutex);
@ -674,7 +674,7 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts()
{
const DataPartPtr & part = *it;
if (part.unique() && /// Grab only parts that is not using by anyone (SELECTs for example)
if (part.unique() && /// Grab only parts that are not used by anyone (SELECTs for example).
part->remove_time < now &&
now - part->remove_time > settings.old_parts_lifetime.totalSeconds())
{
@ -716,9 +716,9 @@ void MergeTreeData::removePartsFinally(const MergeTreeData::DataPartsVector & pa
/// TODO: use data_parts iterators instead of pointers
for (auto & part : parts)
{
auto it = data_parts_by_name.find(part->info);
if (it == data_parts_by_name.end())
throw Exception("Deleting data part " + part->name + " is not exist", ErrorCodes::LOGICAL_ERROR);
auto it = data_parts_by_info.find(part->info);
if (it == data_parts_by_info.end())
throw Exception("Deleting data part " + part->name + " doesn't exist", ErrorCodes::LOGICAL_ERROR);
(*it)->assertState({DataPartState::Deleting});
@ -1362,6 +1362,58 @@ MergeTreeData::AlterDataPartTransaction::~AlterDataPartTransaction()
}
MergeTreeData::DataPartsVector MergeTreeData::getActivePartsToReplace(
const MergeTreePartInfo & new_part_info,
DataPartPtr & out_covering_part,
std::lock_guard<std::mutex> & /* data_parts_lock */) const
{
/// Parts contained in the part are consecutive in data_parts, intersecting the insertion place for the part itself.
auto it_middle = data_parts_by_state_and_info.lower_bound(DataPartStateAndInfo(DataPartState::Committed, new_part_info));
auto committed_parts_range = getDataPartsStateRange(DataPartState::Committed);
/// Go to the left.
DataPartIteratorByStateAndInfo begin = it_middle;
while (begin != committed_parts_range.begin())
{
auto prev = std::prev(begin);
if (!new_part_info.contains((*prev)->info))
{
if ((*prev)->info.contains(new_part_info))
{
out_covering_part = *prev;
return {};
}
break;
}
begin = prev;
}
/// Go to the right.
DataPartIteratorByStateAndInfo end = it_middle;
while (end != committed_parts_range.end())
{
if ((*end)->info == new_part_info)
throw Exception("Unexpected duplicate part " + (*end)->getNameWithState() + ". It is a bug.", ErrorCodes::LOGICAL_ERROR);
if (!new_part_info.contains((*end)->info))
{
if ((*end)->info.contains(new_part_info))
{
out_covering_part = *end;
return {};
}
break;
}
++end;
}
return DataPartsVector{begin, end};
}
void MergeTreeData::renameTempPartAndAdd(MutableDataPartPtr & part, SimpleIncrement * increment, Transaction * out_transaction)
{
auto removed = renameTempPartAndReplace(part, increment, out_transaction);
@ -1375,184 +1427,91 @@ void MergeTreeData::renameTempPartAndAdd(MutableDataPartPtr & part, SimpleIncrem
MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace(
MutableDataPartPtr & part, SimpleIncrement * increment, Transaction * out_transaction)
{
if (out_transaction && out_transaction->data)
throw Exception("Using the same MergeTreeData::Transaction for overlapping transactions is invalid", ErrorCodes::LOGICAL_ERROR);
if (out_transaction && out_transaction->data && out_transaction->data != this)
throw Exception("The same MergeTreeData::Transaction cannot be used for different tables",
ErrorCodes::LOGICAL_ERROR);
std::lock_guard<std::mutex> lock(data_parts_mutex);
part->assertState({DataPartState::Temporary});
MergeTreePartInfo part_info = part->info;
String part_name;
DataPartsVector replaced_parts;
std::vector<DataPartIteratorByStateAndName> replaced_iterators;
if (DataPartPtr existing_part_in_partition = getAnyPartInPartition(part->info.partition_id, lock))
{
std::unique_lock<std::mutex> lock(data_parts_mutex);
if (DataPartPtr existing_part_in_partition = getAnyPartInPartition(part->info.partition_id, lock))
{
if (part->partition.value != existing_part_in_partition->partition.value)
throw Exception(
"Partition value mismatch between two parts with the same partition ID. Existing part: "
+ existing_part_in_partition->name + ", newly added part: " + part->name,
ErrorCodes::CORRUPTED_DATA);
}
/** It is important that obtaining new block number and adding that block to parts set is done atomically.
* Otherwise there is race condition - merge of blocks could happen in interval that doesn't yet contain new part.
*/
if (increment)
part_info.min_block = part_info.max_block = increment->get();
if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
part_name = part_info.getPartNameV0(part->getMinDate(), part->getMaxDate());
else
part_name = part_info.getPartName();
LOG_TRACE(log, "Renaming temporary part " << part->relative_path << " to " << part_name << ".");
auto it_duplicate = data_parts_by_name.find(part_info);
if (it_duplicate != data_parts_by_name.end())
{
String message = "Part " + (*it_duplicate)->getNameWithState() + " already exists";
if ((*it_duplicate)->checkState({DataPartState::Outdated, DataPartState::Deleting}))
{
throw Exception(message + ", but it will be deleted soon", ErrorCodes::PART_IS_TEMPORARILY_LOCKED);
}
throw Exception(message, ErrorCodes::DUPLICATE_DATA_PART);
}
/// Check that part is not covered and doesn't cover other in-progress parts, it makes sense only for Replicated* engines
if (out_transaction)
{
auto check_coverage = [&part_info, &part_name] (const DataPartPtr & part)
{
if (part_info.contains(part->info))
throw Exception("Cannot add part " + part_name + " covering pre-committed part " + part->name, ErrorCodes::PART_IS_TEMPORARILY_LOCKED);
else if (part->info.contains(part_info))
throw Exception("Cannot add part " + part_name + " covered by pre-committed part " + part->name, ErrorCodes::PART_IS_TEMPORARILY_LOCKED);
};
auto it_middle = data_parts_by_state_and_name.lower_bound(DataPartStateAndInfo(DataPartState::PreCommitted, part_info));
auto precommitted_parts_range = getDataPartsStateRange(DataPartState::PreCommitted);
for (auto it = it_middle; it != precommitted_parts_range.begin();)
{
--it;
check_coverage(*it);
}
for (auto it = it_middle; it != precommitted_parts_range.end();)
{
check_coverage(*it);
++it;
}
}
/// Is the part covered by some other part?
DataPartPtr covering_part;
auto it_middle = data_parts_by_state_and_name.lower_bound(DataPartStateAndInfo(DataPartState::Committed, part_info));
/// Parts contained in the part are consecutive in data_parts, intersecting the insertion place for the part itself.
auto committed_parts_range = getDataPartsStateRange(DataPartState::Committed);
/// Go to the left.
for (auto it = it_middle; it != committed_parts_range.begin();)
{
--it;
if (!part_info.contains((*it)->info))
{
if ((*it)->info.contains(part_info))
covering_part = *it;
break;
}
replaced_iterators.push_back(it);
}
/// Parts must be in ascending order.
std::reverse(replaced_iterators.begin(), replaced_iterators.end());
/// Go to the right.
for (auto it = it_middle; it != committed_parts_range.end();)
{
if ((*it)->name == part_name)
throw Exception("Unexpected duplicate part " + (*it)->getNameWithState() + ". It is a bug.", ErrorCodes::LOGICAL_ERROR);
if (!part_info.contains((*it)->info))
{
if ((*it)->info.contains(part_info))
covering_part = *it;
break;
}
replaced_iterators.push_back(it);
++it;
}
if (covering_part)
{
LOG_WARNING(log, "Tried to add obsolete part " << part_name << " covered by " << covering_part->getNameWithState());
/// It is a temporary part, we want to delete it from filesystem immediately
/// Other fields remain the same
part->remove_time = time(nullptr);
part->is_temp = true;
/// Nothing to commit or rollback
if (out_transaction)
{
out_transaction->data = this;
out_transaction->parts_to_add_on_rollback = {};
out_transaction->parts_to_remove_on_rollback = {};
}
/// We replaced nothing
return {};
}
/// All checks are passed. Now we can rename the part on disk.
/// So, we maintain invariant: if a non-temporary part in filesystem then it is in data_parts
///
/// Ordinary MergeTree engines (they don't use out_transaction) commit parts immediately,
/// whereas ReplicatedMergeTree uses intermediate PreCommitted state
part->name = part_name;
part->info = part_info;
part->is_temp = false;
part->state = (out_transaction) ? DataPartState::PreCommitted : DataPartState::Committed;
part->renameTo(part_name);
data_parts_indexes.insert(part);
replaced_parts.reserve(replaced_iterators.size());
for (auto it_replacing_part : replaced_iterators)
replaced_parts.emplace_back(*it_replacing_part);
if (!out_transaction)
{
addPartContributionToColumnSizes(part);
auto current_time = time(nullptr);
for (auto it_replacing_part : replaced_iterators)
{
(*it_replacing_part)->remove_time = current_time;
modifyPartState(it_replacing_part, DataPartState::Outdated);
removePartContributionToColumnSizes(*it_replacing_part);
}
}
else
{
out_transaction->data = this;
out_transaction->parts_to_add_on_rollback = replaced_parts;
out_transaction->parts_to_remove_on_rollback = {part};
}
if (part->partition.value != existing_part_in_partition->partition.value)
throw Exception(
"Partition value mismatch between two parts with the same partition ID. Existing part: "
+ existing_part_in_partition->name + ", newly added part: " + part->name,
ErrorCodes::CORRUPTED_DATA);
}
return replaced_parts;
/** It is important that obtaining new block number and adding that block to parts set is done atomically.
* Otherwise there is race condition - merge of blocks could happen in interval that doesn't yet contain new part.
*/
if (increment)
part_info.min_block = part_info.max_block = increment->get();
if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
part_name = part_info.getPartNameV0(part->getMinDate(), part->getMaxDate());
else
part_name = part_info.getPartName();
LOG_TRACE(log, "Renaming temporary part " << part->relative_path << " to " << part_name << ".");
auto it_duplicate = data_parts_by_info.find(part_info);
if (it_duplicate != data_parts_by_info.end())
{
String message = "Part " + (*it_duplicate)->getNameWithState() + " already exists";
if ((*it_duplicate)->checkState({DataPartState::Outdated, DataPartState::Deleting}))
throw Exception(message + ", but it will be deleted soon", ErrorCodes::PART_IS_TEMPORARILY_LOCKED);
throw Exception(message, ErrorCodes::DUPLICATE_DATA_PART);
}
DataPartPtr covering_part;
DataPartsVector covered_parts = getActivePartsToReplace(part_info, covering_part, lock);
if (covering_part)
{
LOG_WARNING(log, "Tried to add obsolete part " << part_name << " covered by " << covering_part->getNameWithState());
return {};
}
/// All checks are passed. Now we can rename the part on disk.
/// So, we maintain invariant: if a non-temporary part in filesystem then it is in data_parts
///
/// If out_transaction is null, we commit the part to the active set immediately, else add it to the transaction.
part->name = part_name;
part->info = part_info;
part->is_temp = false;
part->state = DataPartState::PreCommitted;
part->renameTo(part_name);
auto part_it = data_parts_indexes.insert(part).first;
if (out_transaction)
{
out_transaction->data = this;
out_transaction->precommitted_parts.insert(part);
}
else
{
auto current_time = time(nullptr);
for (const DataPartPtr & covered_part : covered_parts)
{
covered_part->remove_time = current_time;
modifyPartState(covered_part, DataPartState::Outdated);
removePartContributionToColumnSizes(covered_part);
}
modifyPartState(part_it, DataPartState::Committed);
addPartContributionToColumnSizes(part);
}
return covered_parts;
}
void MergeTreeData::removePartsFromWorkingSet(const DataPartsVector & remove, bool clear_without_timeout)
@ -1561,7 +1520,7 @@ void MergeTreeData::removePartsFromWorkingSet(const DataPartsVector & remove, bo
for (auto & part : remove)
{
if (!data_parts_by_name.count(part->info))
if (!data_parts_by_info.count(part->info))
throw Exception("Part " + part->getNameWithState() + " not found in data_parts", ErrorCodes::LOGICAL_ERROR);
part->assertState({DataPartState::PreCommitted, DataPartState::Committed, DataPartState::Outdated});
@ -1586,8 +1545,8 @@ void MergeTreeData::renameAndDetachPart(const DataPartPtr & part_to_detach, cons
std::lock_guard<std::mutex> lock(data_parts_mutex);
auto it_part = data_parts_by_name.find(part_to_detach->info);
if (it_part == data_parts_by_name.end())
auto it_part = data_parts_by_info.find(part_to_detach->info);
if (it_part == data_parts_by_info.end())
throw Exception("No such data part " + part_to_detach->getNameWithState(), ErrorCodes::NO_SUCH_DATA_PART);
/// What if part_to_detach is reference to *it_part? Make a new owner just in case.
@ -1619,16 +1578,16 @@ void MergeTreeData::renameAndDetachPart(const DataPartPtr & part_to_detach, cons
return state == DataPartState::Committed || state == DataPartState::Outdated;
};
auto update_error = [&] (DataPartIteratorByAndName it)
auto update_error = [&] (DataPartIteratorByInfo it)
{
error = true;
error_parts += (*it)->getNameWithState() + " ";
};
auto it_middle = data_parts_by_name.lower_bound(part->info);
auto it_middle = data_parts_by_info.lower_bound(part->info);
/// Restore the leftmost part covered by the part
if (it_middle != data_parts_by_name.begin())
if (it_middle != data_parts_by_info.begin())
{
auto it = std::prev(it_middle);
@ -1654,7 +1613,7 @@ void MergeTreeData::renameAndDetachPart(const DataPartPtr & part_to_detach, cons
error = true;
/// Restore "right" parts
for (auto it = it_middle; it != data_parts_by_name.end() && part->contains(**it); ++it)
for (auto it = it_middle; it != data_parts_by_info.end() && part->contains(**it); ++it)
{
if ((*it)->info.min_block < pos)
continue;
@ -1776,7 +1735,7 @@ MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart(const String &
auto committed_parts_range = getDataPartsStateRange(DataPartState::Committed);
/// The part can be covered only by the previous or the next one in data_parts.
auto it = data_parts_by_state_and_name.lower_bound(DataPartStateAndInfo(DataPartState::Committed, part_info));
auto it = data_parts_by_state_and_info.lower_bound(DataPartStateAndInfo(DataPartState::Committed, part_info));
if (it != committed_parts_range.end())
{
@ -1803,8 +1762,8 @@ MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const String & part_na
std::lock_guard<std::mutex> lock(data_parts_mutex);
auto it = data_parts_by_name.find(part_info);
if (it == data_parts_by_name.end())
auto it = data_parts_by_info.find(part_info);
if (it == data_parts_by_info.end())
return nullptr;
for (auto state : valid_states)
@ -2067,7 +2026,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, const Context
String partition_id = partition.getID(*this);
{
std::unique_lock<std::mutex> data_parts_lock(data_parts_mutex);
std::lock_guard<std::mutex> data_parts_lock(data_parts_mutex);
DataPartPtr existing_part_in_partition = getAnyPartInPartition(partition_id, data_parts_lock);
if (existing_part_in_partition && existing_part_in_partition->partition.value != partition.value)
{
@ -2115,7 +2074,7 @@ MergeTreeData::DataPartsVector MergeTreeData::getAllDataPartsVector(MergeTreeDat
DataPartsVector res;
{
std::lock_guard<std::mutex> lock(data_parts_mutex);
res.assign(data_parts_by_name.begin(), data_parts_by_name.end());
res.assign(data_parts_by_info.begin(), data_parts_by_info.end());
if (out_states != nullptr)
{
@ -2153,14 +2112,14 @@ MergeTreeData::DataPartsVector MergeTreeData::getDataPartsVector() const
}
MergeTreeData::DataPartPtr MergeTreeData::getAnyPartInPartition(
const String & partition_id, std::unique_lock<std::mutex> & /*data_parts_lock*/)
const String & partition_id, std::lock_guard<std::mutex> & /*data_parts_lock*/)
{
auto min_block = std::numeric_limits<Int64>::min();
MergeTreePartInfo dummy_part_info(partition_id, min_block, min_block, 0);
auto it = data_parts_by_state_and_name.lower_bound(DataPartStateAndInfo(DataPartState::Committed, dummy_part_info));
auto it = data_parts_by_state_and_info.lower_bound(DataPartStateAndInfo(DataPartState::Committed, dummy_part_info));
if (it != data_parts_by_state_and_name.end() && (*it)->state == DataPartState::Committed && (*it)->info.partition_id == partition_id)
if (it != data_parts_by_state_and_info.end() && (*it)->state == DataPartState::Committed && (*it)->info.partition_id == partition_id)
return *it;
return nullptr;
@ -2171,79 +2130,60 @@ void MergeTreeData::Transaction::rollback()
if (!isEmpty())
{
std::stringstream ss;
if (!parts_to_remove_on_rollback.empty())
{
ss << " Removing parts:";
for (const auto & part : parts_to_remove_on_rollback)
ss << " " << part->relative_path;
ss << ".";
}
if (!parts_to_add_on_rollback.empty())
{
ss << " Adding parts: ";
for (const auto & part : parts_to_add_on_rollback)
ss << " " << part->relative_path;
ss << ".";
}
ss << " Removing parts:";
for (const auto & part : precommitted_parts)
ss << " " << part->relative_path;
ss << ".";
LOG_DEBUG(data->log, "Undoing transaction." << ss.str());
/// PreCommitted -> Outdated
replaceParts(DataPartState::Outdated, DataPartState::Committed, true);
data->removePartsFromWorkingSet(
DataPartsVector(precommitted_parts.begin(), precommitted_parts.end()),
/* clear_without_timeout = */ true);
}
clear();
}
void MergeTreeData::Transaction::commit()
MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit()
{
DataPartsVector total_covered_parts;
if (!isEmpty())
{
/// PreCommitted -> Committed, Committed -> Outdated
replaceParts(DataPartState::Committed, DataPartState::Outdated, false);
std::lock_guard<std::mutex> data_parts_lock(data->data_parts_mutex);
auto current_time = time(nullptr);
for (const DataPartPtr & part : precommitted_parts)
{
DataPartPtr covering_part;
DataPartsVector covered_parts = data->getActivePartsToReplace(part->info, covering_part, data_parts_lock);
if (covering_part)
{
LOG_WARNING(data->log, "Tried to commit obsolete part " << part->name
<< " covered by " << covering_part->getNameWithState());
part->remove_time = 0; /// The part will be removed without waiting for old_parts_lifetime seconds.
data->modifyPartState(part, DataPartState::Outdated);
}
else
{
total_covered_parts.insert(total_covered_parts.end(), covered_parts.begin(), covered_parts.end());
for (const DataPartPtr & covered_part : covered_parts)
{
covered_part->remove_time = current_time;
data->modifyPartState(covered_part, DataPartState::Outdated);
data->removePartContributionToColumnSizes(covered_part);
}
data->modifyPartState(part, DataPartState::Committed);
data->addPartContributionToColumnSizes(part);
}
}
}
clear();
}
void MergeTreeData::Transaction::replaceParts(MergeTreeData::DataPartState move_precommitted_to,
MergeTreeData::DataPartState move_committed_to, bool remove_without_delay)
{
auto & committed_parts = parts_to_add_on_rollback;
auto & precommitted_parts = parts_to_remove_on_rollback;
/// TODO: also make sense to activate CleanupThread's cv
auto remove_time = (remove_without_delay) ? 0 : time(nullptr);
{
std::lock_guard<std::mutex> lock(data->data_parts_mutex);
for (auto & part : committed_parts)
part->assertState({DataPartState::Committed});
for (auto & part : precommitted_parts)
part->assertState({DataPartState::PreCommitted});
/// If it is rollback then do nothing, else make it Outdated and remove their size contribution
if (move_committed_to != DataPartState::Committed)
{
for (const DataPartPtr & part : committed_parts)
{
data->modifyPartState(part, move_committed_to);
part->remove_time = remove_time;
data->removePartContributionToColumnSizes(part);
}
}
/// If it is rollback just change state to Outdated, else change state to Committed and add their size contribution
for (auto & part : precommitted_parts)
{
data->modifyPartState(part, move_precommitted_to);
if (move_precommitted_to == DataPartState::Committed)
data->addPartContributionToColumnSizes(part);
else
part->remove_time = remove_time;
}
}
return total_covered_parts;
}
bool MergeTreeData::isPrimaryKeyColumn(const ASTPtr &node) const

View File

@ -145,20 +145,24 @@ public:
using DataParts = std::set<DataPartPtr, LessDataPart>;
using DataPartsVector = std::vector<DataPartPtr>;
/// Some operations on the set of parts return a Transaction object.
/// Auxiliary object to add a set of parts into the working set in two steps:
/// * First, as PreCommitted parts (the parts are ready, but not yet in the active set).
/// * Next, if commit() is called, the parts are added to the active set and the parts that are
/// covered by them are marked Outdated.
/// If neither commit() nor rollback() was called, the destructor rollbacks the operation.
class Transaction : private boost::noncopyable
{
public:
Transaction() {}
void commit();
/// Return parts marked Obsolete as a result of the transaction commit.
DataPartsVector commit();
void rollback();
bool isEmpty() const
{
return parts_to_add_on_rollback.empty() && parts_to_remove_on_rollback.empty();
return precommitted_parts.empty();
}
~Transaction()
@ -172,23 +176,18 @@ public:
tryLogCurrentException("~MergeTreeData::Transaction");
}
}
private:
friend class MergeTreeData;
MergeTreeData * data = nullptr;
/// What to do on rollback.
DataPartsVector parts_to_remove_on_rollback;
DataPartsVector parts_to_add_on_rollback;
DataParts precommitted_parts;
void clear()
{
data = nullptr;
parts_to_remove_on_rollback.clear();
parts_to_add_on_rollback.clear();
precommitted_parts.clear();
}
void replaceParts(DataPartState move_precommitted_to, DataPartState move_committed_to, bool remove_without_delay);
};
/// An object that stores the names of temporary files created in the part directory during ALTER of its
@ -368,14 +367,17 @@ public:
/// If until is non-null, wake up from the sleep earlier if the event happened.
void delayInsertIfNeeded(Poco::Event * until = nullptr);
/// Renames temporary part to a permanent part and adds it to the working set.
/// If increment != nullptr, part index is determing using increment. Otherwise part index remains unchanged.
/// Renames temporary part to a permanent part and adds it to the parts set.
/// It is assumed that the part does not intersect with existing parts.
/// If out_transaction != nullptr, sets it to an object allowing to rollback part addition (but not the renaming).
/// If increment != nullptr, part index is determing using increment. Otherwise part index remains unchanged.
/// If out_transaction != nullptr, adds the part in the PreCommitted state (the part will be added to the
/// active set later with out_transaction->commit()).
/// Else, commits the part immediately.
void renameTempPartAndAdd(MutableDataPartPtr & part, SimpleIncrement * increment = nullptr, Transaction * out_transaction = nullptr);
/// The same as renameTempPartAndAdd but the part can intersect existing parts.
/// Deletes and returns all parts covered by the added part (in ascending order).
/// The same as renameTempPartAndAdd but the block range of the part can contain existing parts.
/// Returns all parts covered by the added part (in ascending order).
/// If out_transaction == nullptr, marks covered parts as Outdated.
DataPartsVector renameTempPartAndReplace(
MutableDataPartPtr & part, SimpleIncrement * increment = nullptr, Transaction * out_transaction = nullptr);
@ -573,8 +575,8 @@ private:
/// Work with data parts
struct TagByName{};
struct TagByStateAndName{};
struct TagByInfo{};
struct TagByStateAndInfo{};
static const MergeTreePartInfo & dataPartPtrToInfo(const DataPartPtr & part)
{
@ -588,14 +590,14 @@ private:
using DataPartsIndexes = boost::multi_index_container<DataPartPtr,
boost::multi_index::indexed_by<
/// Index by Name
/// Index by Info
boost::multi_index::ordered_unique<
boost::multi_index::tag<TagByName>,
boost::multi_index::tag<TagByInfo>,
boost::multi_index::global_fun<const DataPartPtr &, const MergeTreePartInfo &, dataPartPtrToInfo>
>,
/// Index by (State, Name), is used to obtain ordered slices of parts with the same state
/// Index by (State, Info), is used to obtain ordered slices of parts with the same state
boost::multi_index::ordered_unique<
boost::multi_index::tag<TagByStateAndName>,
boost::multi_index::tag<TagByStateAndInfo>,
boost::multi_index::global_fun<const DataPartPtr &, DataPartStateAndInfo, dataPartPtrToStateAndInfo>,
LessStateDataPart
>
@ -605,16 +607,16 @@ private:
/// Current set of data parts.
mutable std::mutex data_parts_mutex;
DataPartsIndexes data_parts_indexes;
DataPartsIndexes::index<TagByName>::type & data_parts_by_name;
DataPartsIndexes::index<TagByStateAndName>::type & data_parts_by_state_and_name;
DataPartsIndexes::index<TagByInfo>::type & data_parts_by_info;
DataPartsIndexes::index<TagByStateAndInfo>::type & data_parts_by_state_and_info;
using DataPartIteratorByAndName = DataPartsIndexes::index<TagByName>::type::iterator;
using DataPartIteratorByStateAndName = DataPartsIndexes::index<TagByStateAndName>::type::iterator;
using DataPartIteratorByInfo = DataPartsIndexes::index<TagByInfo>::type::iterator;
using DataPartIteratorByStateAndInfo = DataPartsIndexes::index<TagByStateAndInfo>::type::iterator;
boost::iterator_range<DataPartIteratorByStateAndName> getDataPartsStateRange(DataPartState state) const
boost::iterator_range<DataPartIteratorByStateAndInfo> getDataPartsStateRange(DataPartState state) const
{
auto begin = data_parts_by_state_and_name.lower_bound(state, LessStateDataPart());
auto end = data_parts_by_state_and_name.upper_bound(state, LessStateDataPart());
auto begin = data_parts_by_state_and_info.lower_bound(state, LessStateDataPart());
auto end = data_parts_by_state_and_info.upper_bound(state, LessStateDataPart());
return {begin, end};
}
@ -623,25 +625,25 @@ private:
return [state] (const DataPartPtr & part) { part->state = state; };
}
void modifyPartState(DataPartIteratorByStateAndName it, DataPartState state)
void modifyPartState(DataPartIteratorByStateAndInfo it, DataPartState state)
{
if (!data_parts_by_state_and_name.modify(it, getStateModifier(state)))
if (!data_parts_by_state_and_info.modify(it, getStateModifier(state)))
throw Exception("Can't modify " + (*it)->getNameWithState(), ErrorCodes::LOGICAL_ERROR);
}
void modifyPartState(DataPartIteratorByAndName it, DataPartState state)
void modifyPartState(DataPartIteratorByInfo it, DataPartState state)
{
if (!data_parts_by_state_and_name.modify(data_parts_indexes.project<TagByStateAndName>(it), getStateModifier(state)))
if (!data_parts_by_state_and_info.modify(data_parts_indexes.project<TagByStateAndInfo>(it), getStateModifier(state)))
throw Exception("Can't modify " + (*it)->getNameWithState(), ErrorCodes::LOGICAL_ERROR);
}
void modifyPartState(const DataPartPtr & part, DataPartState state)
{
auto it = data_parts_by_name.find(part->info);
if (it == data_parts_by_name.end() || (*it).get() != part.get())
throw Exception("Part " + part->name + " is not exists", ErrorCodes::LOGICAL_ERROR);
auto it = data_parts_by_info.find(part->info);
if (it == data_parts_by_info.end() || (*it).get() != part.get())
throw Exception("Part " + part->name + " doesn't exist", ErrorCodes::LOGICAL_ERROR);
if (!data_parts_by_state_and_name.modify(data_parts_indexes.project<TagByStateAndName>(it), getStateModifier(state)))
if (!data_parts_by_state_and_info.modify(data_parts_indexes.project<TagByStateAndInfo>(it), getStateModifier(state)))
throw Exception("Can't modify " + (*it)->getNameWithState(), ErrorCodes::LOGICAL_ERROR);
}
@ -672,7 +674,14 @@ private:
void removePartContributionToColumnSizes(const DataPartPtr & part);
/// If there is no part in the partition with ID `partition_id`, returns empty ptr. Should be called under the lock.
DataPartPtr getAnyPartInPartition(const String & partition_id, std::unique_lock<std::mutex> & data_parts_lock);
DataPartPtr getAnyPartInPartition(const String & partition_id, std::lock_guard<std::mutex> & data_parts_lock);
/// Return parts in the Committed set that are covered by the new_part_info or the part that covers it.
/// Will check that the new part doesn't already exist and that it doesn't intersect existing part.
DataPartsVector getActivePartsToReplace(
const MergeTreePartInfo & new_part_info,
DataPartPtr & out_covering_part,
std::lock_guard<std::mutex> & data_parts_lock) const;
/// Checks whether the column is in the primary key.
bool isPrimaryKeyColumn(const ASTPtr &node) const;

View File

@ -29,6 +29,11 @@ struct MergeTreePartInfo
< std::forward_as_tuple(rhs.partition_id, rhs.min_block, rhs.max_block, rhs.level);
}
bool operator==(const MergeTreePartInfo & rhs) const
{
return !(*this < rhs || rhs < *this);
}
/// Contains another part (obtained after merging another part with some other)
bool contains(const MergeTreePartInfo & rhs) const
{

View File

@ -2259,13 +2259,13 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Strin
checkPartAndAddToZooKeeper(part, ops, part_name);
MergeTreeData::Transaction transaction;
replaced_parts = data.renameTempPartAndReplace(part, nullptr, &transaction);
data.renameTempPartAndReplace(part, nullptr, &transaction);
/// Do not commit if the part is obsolete
if (!transaction.isEmpty())
{
getZooKeeper()->multi(ops);
transaction.commit();
replaced_parts = transaction.commit();
}
/** If a quorum is tracked for this part, you must update it.