From 39c88c74e84b1109015ad5b80e164bf57799e3ba Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Sat, 22 Oct 2022 22:31:17 +0800 Subject: [PATCH] check whether last manually created snapshot is done --- contrib/NuRaft | 2 +- docs/en/operations/clickhouse-keeper.md | 8 +++++++- src/Coordination/FourLetterCommand.cpp | 7 ++++++- src/Coordination/FourLetterCommand.h | 14 ++++++++++++++ src/Coordination/KeeperDispatcher.h | 6 ++++++ src/Coordination/KeeperServer.cpp | 16 +++++++++++++++- src/Coordination/KeeperServer.h | 6 ++++++ .../test_keeper_four_word_command/test.py | 9 ++++++--- 8 files changed, 61 insertions(+), 7 deletions(-) diff --git a/contrib/NuRaft b/contrib/NuRaft index 1be805e7cb2..e4e746a24eb 160000 --- a/contrib/NuRaft +++ b/contrib/NuRaft @@ -1 +1 @@ -Subproject commit 1be805e7cb2494aa8170015493474379b0362dfc +Subproject commit e4e746a24eb56861a86f3672771e3308d8c40722 diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md index 03eddd4f6ed..66b4685bff5 100644 --- a/docs/en/operations/clickhouse-keeper.md +++ b/docs/en/operations/clickhouse-keeper.md @@ -309,12 +309,18 @@ Sessions with Ephemerals (1): /clickhouse/task_queue/ddl ``` -- `csnp`: Schedule a snapshot creation task. Return `"Snapshot creation scheduled."` if successfully scheduled or Fail to scheduled snapshot creation.` if failed. +- `csnp`: Schedule a snapshot creation task. Return `Snapshot creation scheduled.` if successfully scheduled or `Fail to scheduled snapshot creation.` if failed. ``` Snapshot creation scheduled. ``` +- `snpd`: Whether the last successfully scheduled snapshot creation is done. Return `Yes` if true or `No` if false. + +``` +Yes +``` + ## [experimental] Migration from ZooKeeper {#migration-from-zookeeper} Seamlessly migration from ZooKeeper to ClickHouse Keeper is impossible you have to stop your ZooKeeper cluster, convert data and start ClickHouse Keeper. `clickhouse-keeper-converter` tool allows converting ZooKeeper logs and snapshots to ClickHouse Keeper snapshot. It works only with ZooKeeper > 3.4. Steps for migration: diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index 70009703c5a..3d1077ea84c 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -477,7 +477,12 @@ String ApiVersionCommand::run() String CreateSnapshotCommand::run() { - return keeper_dispatcher.createSnapshot() ? "Snapshot creation scheduled." : "Fail to scheduled snapshot creation."; + return keeper_dispatcher.createSnapshot() ? "Snapshot creation scheduled." : "Fail to scheduled snapshot creation task."; +} + +String CheckSnapshotDoneCommand::run() +{ + return keeper_dispatcher.snapshotDone() ? "Snapshot creation done." : "Fail to scheduled snapshot creation task."; } } diff --git a/src/Coordination/FourLetterCommand.h b/src/Coordination/FourLetterCommand.h index 5001a750d66..28f1d7f153f 100644 --- a/src/Coordination/FourLetterCommand.h +++ b/src/Coordination/FourLetterCommand.h @@ -340,4 +340,18 @@ struct CreateSnapshotCommand : public IFourLetterCommand String run() override; ~CreateSnapshotCommand() override = default; }; + +/// Check whether last manual snapshot done +struct CheckSnapshotDoneCommand : public IFourLetterCommand +{ + explicit CheckSnapshotDoneCommand(KeeperDispatcher & keeper_dispatcher_) + : IFourLetterCommand(keeper_dispatcher_) + { + } + + String name() override { return "snpd"; } + String run() override; + ~CheckSnapshotDoneCommand() override = default; +}; + } diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index 79212ea3040..48681957c13 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -209,6 +209,12 @@ public: { return server->createSnapshot(); } + + /// Whether the last manually created snapshot is done + bool snapshotDone() + { + return server->snapshotDone(); + } }; } diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index e0186927b54..87ebea0b4ab 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -114,6 +114,7 @@ KeeperServer::KeeperServer( , is_recovering(config.getBool("keeper_server.force_recovery", false)) , keeper_context{std::make_shared()} , create_snapshot_on_exit(config.getBool("keeper_server.create_snapshot_on_exit", true)) + , last_manual_snapshot_log_idx(0) { if (coordination_settings->quorum_reads) LOG_WARNING(log, "Quorum reads enabled, Keeper will work slower."); @@ -908,7 +909,20 @@ Keeper4LWInfo KeeperServer::getPartiallyFilled4LWInfo() const bool KeeperServer::createSnapshot() { - return raft_instance->create_snapshot(); + std::lock_guard lock(snapshot_mutex); + if (raft_instance->create_snapshot()) + { + last_manual_snapshot_log_idx = raft_instance->get_last_snapshot_idx(); + LOG_INFO(log, "Successfully schedule a keeper snapshot creation task at log index {}", last_manual_snapshot_log_idx); + return true; + } + return false; +} + +bool KeeperServer::snapshotDone() +{ + std::lock_guard lock(snapshot_mutex); + return last_manual_snapshot_log_idx != 0 && last_manual_snapshot_log_idx == raft_instance->get_last_snapshot_idx(); } } diff --git a/src/Coordination/KeeperServer.h b/src/Coordination/KeeperServer.h index ec832199387..11e3b75d127 100644 --- a/src/Coordination/KeeperServer.h +++ b/src/Coordination/KeeperServer.h @@ -66,6 +66,10 @@ private: const bool create_snapshot_on_exit; + /// Used to check whether the previous manually created snapshot complete. + uint64_t last_manual_snapshot_log_idx; + std::mutex snapshot_mutex; + public: KeeperServer( const KeeperConfigurationAndSettingsPtr & settings_, @@ -133,6 +137,8 @@ public: bool waitConfigurationUpdate(const ConfigUpdateAction & task); bool createSnapshot(); + + bool snapshotDone(); }; } diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py index 2b2343757bb..bfe0b2a96e4 100644 --- a/tests/integration/test_keeper_four_word_command/test.py +++ b/tests/integration/test_keeper_four_word_command/test.py @@ -598,7 +598,7 @@ def test_cmd_wchp(started_cluster): destroy_zk_client(zk) -def test_cmd_csnp(started_cluster): +def test_cmd_snapshot(started_cluster): zk = None try: wait_nodes() @@ -607,7 +607,10 @@ def test_cmd_csnp(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) - data = send_4lw_cmd(cmd="csnp") - assert data == "Snapshot creation scheduled." + create = send_4lw_cmd(cmd="csnp") + assert create == "Snapshot creation scheduled." + + check = send_4lw_cmd(cmd="snpd") + assert (check == "Yes" or check == "No") finally: destroy_zk_client(zk)