Do not remove detached parts in Transaction::rollback

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
This commit is contained in:
Azat Khuzhin 2024-04-05 10:24:30 +02:00
parent 3675c27fe9
commit 6f522c1d61
5 changed files with 55 additions and 14 deletions

View File

@ -59,6 +59,16 @@ std::string DataPartStorageOnDiskBase::getRelativePath() const
return fs::path(root_path) / part_dir / "";
}
std::string DataPartStorageOnDiskBase::getParentDirectory() const
{
/// Cut last "/" if it exists (it shouldn't). Otherwise fs::path behave differently.
fs::path part_dir_without_slash = part_dir.ends_with("/") ? part_dir.substr(0, part_dir.size() - 1) : part_dir;
if (part_dir_without_slash.has_parent_path())
return part_dir_without_slash.parent_path();
return "";
}
std::optional<String> DataPartStorageOnDiskBase::getRelativePathForPrefix(LoggerPtr log, const String & prefix, bool detached, bool broken) const
{
assert(!broken || detached);
@ -674,9 +684,9 @@ void DataPartStorageOnDiskBase::remove(
if (!has_delete_prefix)
{
if (part_dir_without_slash.has_parent_path())
auto parent_path = getParentDirectory();
if (!parent_path.empty())
{
auto parent_path = part_dir_without_slash.parent_path();
if (parent_path == MergeTreeData::DETACHED_DIR_NAME)
throw Exception(
ErrorCodes::LOGICAL_ERROR,
@ -684,7 +694,7 @@ void DataPartStorageOnDiskBase::remove(
part_dir,
root_path);
part_dir_without_slash = parent_path / ("delete_tmp_" + std::string{part_dir_without_slash.filename()});
part_dir_without_slash = fs::path(parent_path) / ("delete_tmp_" + std::string{part_dir_without_slash.filename()});
}
else
{

View File

@ -20,6 +20,7 @@ public:
std::string getRelativePath() const override;
std::string getPartDirectory() const override;
std::string getFullRootPath() const override;
std::string getParentDirectory() const override;
Poco::Timestamp getLastModified() const override;
UInt64 calculateTotalSizeOnDisk() const override;

View File

@ -96,11 +96,12 @@ public:
virtual MergeTreeDataPartStorageType getType() const = 0;
/// Methods to get path components of a data part.
virtual std::string getFullPath() const = 0; /// '/var/lib/clickhouse/data/database/table/moving/all_1_5_1'
virtual std::string getRelativePath() const = 0; /// 'database/table/moving/all_1_5_1'
virtual std::string getPartDirectory() const = 0; /// 'all_1_5_1'
virtual std::string getFullRootPath() const = 0; /// '/var/lib/clickhouse/data/database/table/moving'
/// Can add it if needed /// 'database/table/moving'
virtual std::string getFullPath() const = 0; /// '/var/lib/clickhouse/data/database/table/moving/all_1_5_1'
virtual std::string getRelativePath() const = 0; /// 'database/table/moving/all_1_5_1'
virtual std::string getPartDirectory() const = 0; /// 'all_1_5_1'
virtual std::string getFullRootPath() const = 0; /// '/var/lib/clickhouse/data/database/table/moving'
virtual std::string getParentDirectory() const = 0; /// '' (or 'detached' for 'detached/all_1_5_1')
/// Can add it if needed /// 'database/table/moving'
/// virtual std::string getRelativeRootPath() const = 0;
/// Get a storage for projection.

View File

@ -4083,9 +4083,9 @@ void MergeTreeData::removePartsFromWorkingSet(MergeTreeTransaction * txn, const
resetObjectColumnsFromActiveParts(acquired_lock);
}
void MergeTreeData::removePartsFromWorkingSetImmediatelyAndSetTemporaryState(const DataPartsVector & remove)
void MergeTreeData::removePartsFromWorkingSetImmediatelyAndSetTemporaryState(const DataPartsVector & remove, DataPartsLock * acquired_lock)
{
auto lock = lockParts();
auto lock = (acquired_lock) ? DataPartsLock() : lockParts();
for (const auto & part : remove)
{
@ -6635,16 +6635,41 @@ void MergeTreeData::Transaction::rollback(DataPartsLock * lock)
{
if (!isEmpty())
{
for (const auto & part : precommitted_parts)
part->version.creation_csn.store(Tx::RolledBackCSN);
/// Remove detached parts from working set.
///
/// It is possible to have detached parts here, only when rename (in
/// commit()) of detached parts had been broken (i.e. during ATTACH),
/// i.e. the part itself is broken.
DataPartsVector detached_precommitted_parts;
for (auto it = precommitted_parts.begin(); it != precommitted_parts.end();)
{
const auto & part = *it;
if (part->getDataPartStorage().getParentDirectory() == DETACHED_DIR_NAME)
{
detached_precommitted_parts.push_back(part);
it = precommitted_parts.erase(it);
}
else
++it;
}
WriteBufferFromOwnString buf;
buf << "Removing parts:";
for (const auto & part : precommitted_parts)
buf << " " << part->getDataPartStorage().getPartDirectory();
buf << ".";
if (!detached_precommitted_parts.empty())
{
buf << " Rollbacking parts state to temporary and removing from working set:";
for (const auto & part : detached_precommitted_parts)
buf << " " << part->getDataPartStorage().getPartDirectory();
buf << ".";
}
LOG_DEBUG(data.log, "Undoing transaction {}. {}", getTID(), buf.str());
for (const auto & part : precommitted_parts)
part->version.creation_csn.store(Tx::RolledBackCSN);
/// It would be much better with TSA...
auto our_lock = (lock) ? DataPartsLock() : data.lockParts();
@ -6663,6 +6688,10 @@ void MergeTreeData::Transaction::rollback(DataPartsLock * lock)
}
else
{
data.removePartsFromWorkingSetImmediatelyAndSetTemporaryState(
detached_precommitted_parts,
&our_lock);
data.removePartsFromWorkingSet(txn,
DataPartsVector(precommitted_parts.begin(), precommitted_parts.end()),
/* clear_without_timeout = */ true, &our_lock);

View File

@ -610,7 +610,7 @@ public:
/// Remove parts from working set immediately (without wait for background
/// process). Transfer part state to temporary. Have very limited usage only
/// for new parts which aren't already present in table.
void removePartsFromWorkingSetImmediatelyAndSetTemporaryState(const DataPartsVector & remove);
void removePartsFromWorkingSetImmediatelyAndSetTemporaryState(const DataPartsVector & remove, DataPartsLock * acquired_lock = nullptr);
/// Removes parts from the working set parts.
/// Parts in add must already be in data_parts with PreActive, Active, or Outdated states.