mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-30 11:32:03 +00:00
Try fix 'Directory tmp_merge_<part_name>' already exists (#32201)
* try fix 'directory tmp_merge_<part_name>' already exists * fix * fix * fix Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
This commit is contained in:
parent
9867d75fec
commit
25427719d4
@ -424,52 +424,59 @@ void IMergeTreeDataPart::setColumns(const NamesAndTypesList & new_columns)
|
|||||||
|
|
||||||
void IMergeTreeDataPart::removeIfNeeded()
|
void IMergeTreeDataPart::removeIfNeeded()
|
||||||
{
|
{
|
||||||
if (state == State::DeleteOnDestroy || is_temp)
|
if (!is_temp && state != State::DeleteOnDestroy)
|
||||||
|
return;
|
||||||
|
|
||||||
|
try
|
||||||
{
|
{
|
||||||
try
|
auto path = getFullRelativePath();
|
||||||
{
|
|
||||||
auto path = getFullRelativePath();
|
|
||||||
|
|
||||||
if (!volume->getDisk()->exists(path))
|
if (!volume->getDisk()->exists(path))
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (is_temp)
|
||||||
|
{
|
||||||
|
String file_name = fileName(relative_path);
|
||||||
|
|
||||||
|
if (file_name.empty())
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "relative_path {} of part {} is invalid or not set", relative_path, name);
|
||||||
|
|
||||||
|
if (!startsWith(file_name, "tmp") && !endsWith(file_name, ".tmp_proj"))
|
||||||
|
{
|
||||||
|
LOG_ERROR(
|
||||||
|
storage.log,
|
||||||
|
"~DataPart() should remove part {} but its name doesn't start with \"tmp\" or end with \".tmp_proj\". Too "
|
||||||
|
"suspicious, keeping the part.",
|
||||||
|
path);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (is_temp)
|
|
||||||
{
|
|
||||||
String file_name = fileName(relative_path);
|
|
||||||
|
|
||||||
if (file_name.empty())
|
|
||||||
throw Exception("relative_path " + relative_path + " of part " + name + " is invalid or not set", ErrorCodes::LOGICAL_ERROR);
|
|
||||||
|
|
||||||
if (!startsWith(file_name, "tmp") && !endsWith(file_name, ".tmp_proj"))
|
|
||||||
{
|
|
||||||
LOG_ERROR(
|
|
||||||
storage.log,
|
|
||||||
"~DataPart() should remove part {} but its name doesn't start with \"tmp\" or end with \".tmp_proj\". Too "
|
|
||||||
"suspicious, keeping the part.",
|
|
||||||
path);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (parent_part)
|
|
||||||
{
|
|
||||||
std::optional<bool> keep_shared_data = keepSharedDataInDecoupledStorage();
|
|
||||||
if (!keep_shared_data.has_value())
|
|
||||||
return;
|
|
||||||
projectionRemove(parent_part->getFullRelativePath(), *keep_shared_data);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
remove();
|
|
||||||
|
|
||||||
if (state == State::DeleteOnDestroy)
|
|
||||||
{
|
|
||||||
LOG_TRACE(storage.log, "Removed part from old location {}", path);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (...)
|
|
||||||
|
if (parent_part)
|
||||||
{
|
{
|
||||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
std::optional<bool> keep_shared_data = keepSharedDataInDecoupledStorage();
|
||||||
|
if (!keep_shared_data.has_value())
|
||||||
|
return;
|
||||||
|
projectionRemove(parent_part->getFullRelativePath(), *keep_shared_data);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
remove();
|
||||||
|
|
||||||
|
if (state == State::DeleteOnDestroy)
|
||||||
|
{
|
||||||
|
LOG_TRACE(storage.log, "Removed part from old location {}", path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (...)
|
||||||
|
{
|
||||||
|
/// FIXME If part it temporary, then directory will not be removed for 1 day (temporary_directories_lifetime).
|
||||||
|
/// If it's tmp_merge_<part_name> or tmp_fetch_<part_name>,
|
||||||
|
/// then all future attempts to execute part producing operation will fail with "directory already exists".
|
||||||
|
/// Seems like it's especially important for remote disks, because removal may fail due to network issues.
|
||||||
|
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||||
|
assert(!is_temp);
|
||||||
|
assert(state != State::DeleteOnDestroy);
|
||||||
|
assert(state != State::Temporary);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1157,14 +1164,17 @@ void IMergeTreeDataPart::remove() const
|
|||||||
* And a race condition can happen that will lead to "File not found" error here.
|
* And a race condition can happen that will lead to "File not found" error here.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/// NOTE We rename part to delete_tmp_<relative_path> instead of delete_tmp_<name> to avoid race condition
|
||||||
|
/// when we try to remove two parts with the same name, but different relative paths,
|
||||||
|
/// for example all_1_2_1 (in Deleting state) and tmp_merge_all_1_2_1 (in Temporary state).
|
||||||
fs::path from = fs::path(storage.relative_data_path) / relative_path;
|
fs::path from = fs::path(storage.relative_data_path) / relative_path;
|
||||||
fs::path to = fs::path(storage.relative_data_path) / ("delete_tmp_" + name);
|
fs::path to = fs::path(storage.relative_data_path) / ("delete_tmp_" + relative_path);
|
||||||
// TODO directory delete_tmp_<name> is never removed if server crashes before returning from this function
|
// TODO directory delete_tmp_<name> is never removed if server crashes before returning from this function
|
||||||
|
|
||||||
auto disk = volume->getDisk();
|
auto disk = volume->getDisk();
|
||||||
if (disk->exists(to))
|
if (disk->exists(to))
|
||||||
{
|
{
|
||||||
LOG_WARNING(storage.log, "Directory {} (to which part must be renamed before removing) already exists. Most likely this is due to unclean restart. Removing it.", fullPath(disk, to));
|
LOG_WARNING(storage.log, "Directory {} (to which part must be renamed before removing) already exists. Most likely this is due to unclean restart or race condition. Removing it.", fullPath(disk, to));
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
disk->removeSharedRecursive(fs::path(to) / "", *keep_shared_data);
|
disk->removeSharedRecursive(fs::path(to) / "", *keep_shared_data);
|
||||||
|
@ -198,6 +198,7 @@ public:
|
|||||||
mutable std::atomic<time_t> remove_time { std::numeric_limits<time_t>::max() };
|
mutable std::atomic<time_t> remove_time { std::numeric_limits<time_t>::max() };
|
||||||
|
|
||||||
/// If true, the destructor will delete the directory with the part.
|
/// If true, the destructor will delete the directory with the part.
|
||||||
|
/// FIXME Why do we need this flag? What's difference from Temporary and DeleteOnDestroy state? Can we get rid of this?
|
||||||
bool is_temp = false;
|
bool is_temp = false;
|
||||||
|
|
||||||
/// If true it means that there are no ZooKeeper node for this part, so it should be deleted only from filesystem
|
/// If true it means that there are no ZooKeeper node for this part, so it should be deleted only from filesystem
|
||||||
|
@ -1373,9 +1373,6 @@ void StorageReplicatedMergeTree::checkPartChecksumsAndAddCommitOps(const zkutil:
|
|||||||
const auto storage_settings_ptr = getSettings();
|
const auto storage_settings_ptr = getSettings();
|
||||||
String part_path = fs::path(replica_path) / "parts" / part_name;
|
String part_path = fs::path(replica_path) / "parts" / part_name;
|
||||||
|
|
||||||
//ops.emplace_back(zkutil::makeCheckRequest(
|
|
||||||
// zookeeper_path + "/columns", expected_columns_version));
|
|
||||||
|
|
||||||
if (storage_settings_ptr->use_minimalistic_part_header_in_zookeeper)
|
if (storage_settings_ptr->use_minimalistic_part_header_in_zookeeper)
|
||||||
{
|
{
|
||||||
ops.emplace_back(zkutil::makeCreateRequest(
|
ops.emplace_back(zkutil::makeCreateRequest(
|
||||||
@ -1421,6 +1418,7 @@ MergeTreeData::DataPartsVector StorageReplicatedMergeTree::checkPartChecksumsAnd
|
|||||||
Coordination::Requests new_ops;
|
Coordination::Requests new_ops;
|
||||||
for (const String & part_path : absent_part_paths_on_replicas)
|
for (const String & part_path : absent_part_paths_on_replicas)
|
||||||
{
|
{
|
||||||
|
/// NOTE Create request may fail with ZNONODE if replica is being dropped, we will throw an exception
|
||||||
new_ops.emplace_back(zkutil::makeCreateRequest(part_path, "", zkutil::CreateMode::Persistent));
|
new_ops.emplace_back(zkutil::makeCreateRequest(part_path, "", zkutil::CreateMode::Persistent));
|
||||||
new_ops.emplace_back(zkutil::makeRemoveRequest(part_path, -1));
|
new_ops.emplace_back(zkutil::makeRemoveRequest(part_path, -1));
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user