fix race between executeMetadataAlter and initializeReplication

This commit is contained in:
Alexander Tokmakov 2023-06-27 19:41:24 +02:00
parent dc0dc227e0
commit 1d47783e85
5 changed files with 28 additions and 1 deletions

View File

@ -1293,6 +1293,16 @@ void DatabaseReplicated::commitAlterTable(const StorageID & table_id,
assert(checkDigestValid(query_context));
}
bool DatabaseReplicated::canExecuteReplicatedMetadataAlter() const
{
/// ReplicatedMergeTree may call commitAlterTable from its background threads when executing ALTER_METADATA entries.
/// It may update the metadata digest (both locally and in ZooKeeper)
/// before DatabaseReplicatedDDLWorker::initializeReplication() has finished.
/// We should not update metadata until the database is initialized.
return ddl_worker && ddl_worker->isCurrentlyActive();
}
void DatabaseReplicated::detachTablePermanently(ContextPtr local_context, const String & table_name)
{
auto txn = local_context->getZooKeeperMetadataTransaction();

View File

@ -48,6 +48,8 @@ public:
/// then it will be executed on all replicas.
BlockIO tryEnqueueReplicatedDDL(const ASTPtr & query, ContextPtr query_context, bool internal) override;
bool canExecuteReplicatedMetadataAlter() const override;
bool hasReplicationThread() const override { return true; }
void stopReplication() override;

View File

@ -91,12 +91,12 @@ void DatabaseReplicatedDDLWorker::initializeReplication()
if (zookeeper->tryGet(database->replica_path + "/digest", digest_str))
{
digest = parse<UInt64>(digest_str);
LOG_TRACE(log, "Metadata digest in ZooKeeper: {}", digest);
std::lock_guard lock{database->metadata_mutex};
local_digest = database->tables_metadata_digest;
}
else
{
LOG_WARNING(log, "Did not find digest in ZooKeeper, creating it");
/// Database was created by old ClickHouse versions, let's create the node
std::lock_guard lock{database->metadata_mutex};
digest = local_digest = database->tables_metadata_digest;
@ -104,6 +104,9 @@ void DatabaseReplicatedDDLWorker::initializeReplication()
zookeeper->create(database->replica_path + "/digest", digest_str, zkutil::CreateMode::Persistent);
}
LOG_TRACE(log, "Trying to initialize replication: our_log_ptr={}, max_log_ptr={}, local_digest={}, zk_digest={}",
our_log_ptr, max_log_ptr, local_digest, digest);
bool is_new_replica = our_log_ptr == 0;
bool lost_according_to_log_ptr = our_log_ptr + logs_to_keep < max_log_ptr;
bool lost_according_to_digest = database->db_settings.check_consistency && local_digest != digest;

View File

@ -254,6 +254,9 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{}: alterTable() is not supported", getEngineName());
}
/// Special method for ReplicatedMergeTree and DatabaseReplicated
virtual bool canExecuteReplicatedMetadataAlter() const { return true; }
/// Returns time of table's metadata change, 0 if there is no corresponding metadata file.
virtual time_t getObjectMetadataModificationTime(const String & /*name*/) const
{

View File

@ -1448,6 +1448,15 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
LOG_TRACE(LogToStr(out_postpone_reason, log), fmt_string, entry.znode_name, entry.alter_version, head_alter);
return false;
}
auto database_name = storage.getStorageID().database_name;
auto database = DatabaseCatalog::instance().getDatabase(database_name);
if (!database->canExecuteReplicatedMetadataAlter())
{
LOG_TRACE(LogToStr(out_postpone_reason, log), "Cannot execute alter metadata {} with version {} "
"because database {} cannot process metadata alters now", entry.znode_name, entry.alter_version, database_name);
return false;
}
}
/// If this MUTATE_PART is part of alter modify/drop query, than we have to execute them one by one