adding more comments

This commit is contained in:
Sema Checherinda 2023-05-12 13:00:56 +02:00
parent 66e3712dca
commit 5706e05757
6 changed files with 27 additions and 2 deletions

View File

@ -919,6 +919,9 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk(
new_data_part->version.setCreationTID(Tx::PrehistoricTID, nullptr); new_data_part->version.setCreationTID(Tx::PrehistoricTID, nullptr);
new_data_part->is_temp = true; new_data_part->is_temp = true;
/// In case of replicated merge tree with zero copy replication
/// Here Clickhouse claims that this new part can be deleted in temporary state without unlocking the blobs
/// The blobs have to stay intact, this temporary part does not own them and does not share them yet.
new_data_part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::PRESERVE_BLOBS; new_data_part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::PRESERVE_BLOBS;
new_data_part->modification_time = time(nullptr); new_data_part->modification_time = time(nullptr);
new_data_part->loadColumnsChecksumsIndexes(true, false); new_data_part->loadColumnsChecksumsIndexes(true, false);

View File

@ -218,14 +218,20 @@ public:
/// FIXME Why do we need this flag? What's difference from Temporary and DeleteOnDestroy state? Can we get rid of this? /// FIXME Why do we need this flag? What's difference from Temporary and DeleteOnDestroy state? Can we get rid of this?
bool is_temp = false; bool is_temp = false;
/// This type and the field remove_tmp_policy is used as a hint
/// to help avoid communication with keeper when temporary part is deleting.
/// The common procedure is to ask the keeper with unlock request to release a references to the blobs.
/// And then follow the keeper answer decide remove or preserve the blobs in that part from s3.
/// However in some special cases Clickhouse can make a decision without asking keeper.
enum class BlobsRemovalPolicyForTemporaryParts enum class BlobsRemovalPolicyForTemporaryParts
{ {
/// decision about removing blobs is determined by keeper, the common case
ASK_KEEPER, ASK_KEEPER,
/// is set when Clickhouse is sure that the blobs in the part are belong only to it, other replicas have not seen them yet
REMOVE_BLOBS, REMOVE_BLOBS,
/// is set when Clickhouse is sure that the blobs belong to other replica and current replica has not locked them on s3 yet
PRESERVE_BLOBS, PRESERVE_BLOBS,
}; };
/// That field is used by replicated merge tree with zero copy replication
/// Usually the data has to bo unlocked in keeper unless explicitly otherwise stated
BlobsRemovalPolicyForTemporaryParts remove_tmp_policy = BlobsRemovalPolicyForTemporaryParts::ASK_KEEPER; BlobsRemovalPolicyForTemporaryParts remove_tmp_policy = BlobsRemovalPolicyForTemporaryParts::ASK_KEEPER;
/// If true it means that there are no ZooKeeper node for this part, so it should be deleted only from filesystem /// If true it means that there are no ZooKeeper node for this part, so it should be deleted only from filesystem

View File

@ -209,6 +209,9 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
global_ctx->new_data_part->uuid = global_ctx->future_part->uuid; global_ctx->new_data_part->uuid = global_ctx->future_part->uuid;
global_ctx->new_data_part->partition.assign(global_ctx->future_part->getPartition()); global_ctx->new_data_part->partition.assign(global_ctx->future_part->getPartition());
global_ctx->new_data_part->is_temp = global_ctx->parent_part == nullptr; global_ctx->new_data_part->is_temp = global_ctx->parent_part == nullptr;
/// In case of replicated merge tree with zero copy replication
/// Here Clickhouse claims that this new part can be deleted in temporary state without unlocking the blobs
/// The blobs have to be removed along with the part, this temporary part owns them and does not share them yet.
global_ctx->new_data_part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::REMOVE_BLOBS; global_ctx->new_data_part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::REMOVE_BLOBS;
ctx->need_remove_expired_values = false; ctx->need_remove_expired_values = false;

View File

@ -8282,6 +8282,9 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::createEmptyPart(
new_data_part->minmax_idx = std::move(minmax_idx); new_data_part->minmax_idx = std::move(minmax_idx);
new_data_part->is_temp = true; new_data_part->is_temp = true;
/// In case of replicated merge tree with zero copy replication
/// Here Clickhouse claims that this new part can be deleted in temporary state without unlocking the blobs
/// The blobs have to be removed along with the part, this temporary part owns them and does not share them yet.
new_data_part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::REMOVE_BLOBS; new_data_part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::REMOVE_BLOBS;
auto new_data_part_storage = new_data_part->getDataPartStoragePtr(); auto new_data_part_storage = new_data_part->getDataPartStoragePtr();

View File

@ -469,6 +469,9 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(
new_data_part->partition = std::move(partition); new_data_part->partition = std::move(partition);
new_data_part->minmax_idx = std::move(minmax_idx); new_data_part->minmax_idx = std::move(minmax_idx);
new_data_part->is_temp = true; new_data_part->is_temp = true;
/// In case of replicated merge tree with zero copy replication
/// Here Clickhouse claims that this new part can be deleted in temporary state without unlocking the blobs
/// The blobs have to be removed along with the part, this temporary part owns them and does not share them yet.
new_data_part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::REMOVE_BLOBS; new_data_part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::REMOVE_BLOBS;
SyncGuardPtr sync_guard; SyncGuardPtr sync_guard;

View File

@ -1838,6 +1838,9 @@ bool MutateTask::prepare()
if (!isWidePart(ctx->source_part) || !isFullPartStorage(ctx->source_part->getDataPartStorage()) if (!isWidePart(ctx->source_part) || !isFullPartStorage(ctx->source_part->getDataPartStorage())
|| (ctx->interpreter && ctx->interpreter->isAffectingAllColumns())) || (ctx->interpreter && ctx->interpreter->isAffectingAllColumns()))
{ {
/// In case of replicated merge tree with zero copy replication
/// Here Clickhouse claims that this new part can be deleted in temporary state without unlocking the blobs
/// The blobs have to be removed along with the part, this temporary part owns them and does not share them yet.
ctx->new_data_part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::REMOVE_BLOBS; ctx->new_data_part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::REMOVE_BLOBS;
task = std::make_unique<MutateAllPartColumnsTask>(ctx); task = std::make_unique<MutateAllPartColumnsTask>(ctx);
@ -1867,6 +1870,10 @@ bool MutateTask::prepare()
ctx->for_file_renames, ctx->for_file_renames,
ctx->mrk_extension); ctx->mrk_extension);
/// In case of replicated merge tree with zero copy replication
/// Here Clickhouse has to follow the common procedure when deleting new part in temporary state
/// Some of the files within the blobs are shared with source part, some belongs only to the part
/// Keeper has to be asked with unlock request to release the references to the blobs
ctx->new_data_part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::ASK_KEEPER; ctx->new_data_part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::ASK_KEEPER;
task = std::make_unique<MutateSomePartColumnsTask>(ctx); task = std::make_unique<MutateSomePartColumnsTask>(ctx);