check whether last manually created snapshot is done

This commit is contained in:
JackyWoo 2022-10-22 22:31:17 +08:00
parent bf291790b7
commit 39c88c74e8
8 changed files with 61 additions and 7 deletions

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit 1be805e7cb2494aa8170015493474379b0362dfc
Subproject commit e4e746a24eb56861a86f3672771e3308d8c40722

View File

@ -309,12 +309,18 @@ Sessions with Ephemerals (1):
/clickhouse/task_queue/ddl
```
- `csnp`: Schedule a snapshot creation task. Return `"Snapshot creation scheduled."` if successfully scheduled or Fail to scheduled snapshot creation.` if failed.
- `csnp`: Schedule a snapshot creation task. Return `Snapshot creation scheduled.` if successfully scheduled or `Fail to scheduled snapshot creation.` if failed.
```
Snapshot creation scheduled.
```
- `snpd`: Whether the last successfully scheduled snapshot creation is done. Return `Yes` if true or `No` if false.
```
Yes
```
## [experimental] Migration from ZooKeeper {#migration-from-zookeeper}
Seamlessly migration from ZooKeeper to ClickHouse Keeper is impossible you have to stop your ZooKeeper cluster, convert data and start ClickHouse Keeper. `clickhouse-keeper-converter` tool allows converting ZooKeeper logs and snapshots to ClickHouse Keeper snapshot. It works only with ZooKeeper > 3.4. Steps for migration:

View File

@ -477,7 +477,12 @@ String ApiVersionCommand::run()
String CreateSnapshotCommand::run()
{
return keeper_dispatcher.createSnapshot() ? "Snapshot creation scheduled." : "Fail to scheduled snapshot creation.";
return keeper_dispatcher.createSnapshot() ? "Snapshot creation scheduled." : "Fail to scheduled snapshot creation task.";
}
String CheckSnapshotDoneCommand::run()
{
return keeper_dispatcher.snapshotDone() ? "Snapshot creation done." : "Fail to scheduled snapshot creation task.";
}
}

View File

@ -340,4 +340,18 @@ struct CreateSnapshotCommand : public IFourLetterCommand
String run() override;
~CreateSnapshotCommand() override = default;
};
/// Check whether last manual snapshot done
struct CheckSnapshotDoneCommand : public IFourLetterCommand
{
explicit CheckSnapshotDoneCommand(KeeperDispatcher & keeper_dispatcher_)
: IFourLetterCommand(keeper_dispatcher_)
{
}
String name() override { return "snpd"; }
String run() override;
~CheckSnapshotDoneCommand() override = default;
};
}

View File

@ -209,6 +209,12 @@ public:
{
return server->createSnapshot();
}
/// Whether the last manually created snapshot is done
bool snapshotDone()
{
return server->snapshotDone();
}
};
}

View File

@ -114,6 +114,7 @@ KeeperServer::KeeperServer(
, is_recovering(config.getBool("keeper_server.force_recovery", false))
, keeper_context{std::make_shared<KeeperContext>()}
, create_snapshot_on_exit(config.getBool("keeper_server.create_snapshot_on_exit", true))
, last_manual_snapshot_log_idx(0)
{
if (coordination_settings->quorum_reads)
LOG_WARNING(log, "Quorum reads enabled, Keeper will work slower.");
@ -908,7 +909,20 @@ Keeper4LWInfo KeeperServer::getPartiallyFilled4LWInfo() const
bool KeeperServer::createSnapshot()
{
return raft_instance->create_snapshot();
std::lock_guard lock(snapshot_mutex);
if (raft_instance->create_snapshot())
{
last_manual_snapshot_log_idx = raft_instance->get_last_snapshot_idx();
LOG_INFO(log, "Successfully schedule a keeper snapshot creation task at log index {}", last_manual_snapshot_log_idx);
return true;
}
return false;
}
bool KeeperServer::snapshotDone()
{
std::lock_guard lock(snapshot_mutex);
return last_manual_snapshot_log_idx != 0 && last_manual_snapshot_log_idx == raft_instance->get_last_snapshot_idx();
}
}

View File

@ -66,6 +66,10 @@ private:
const bool create_snapshot_on_exit;
/// Used to check whether the previous manually created snapshot complete.
uint64_t last_manual_snapshot_log_idx;
std::mutex snapshot_mutex;
public:
KeeperServer(
const KeeperConfigurationAndSettingsPtr & settings_,
@ -133,6 +137,8 @@ public:
bool waitConfigurationUpdate(const ConfigUpdateAction & task);
bool createSnapshot();
bool snapshotDone();
};
}

View File

@ -598,7 +598,7 @@ def test_cmd_wchp(started_cluster):
destroy_zk_client(zk)
def test_cmd_csnp(started_cluster):
def test_cmd_snapshot(started_cluster):
zk = None
try:
wait_nodes()
@ -607,7 +607,10 @@ def test_cmd_csnp(started_cluster):
zk = get_fake_zk(node1.name, timeout=30.0)
data = send_4lw_cmd(cmd="csnp")
assert data == "Snapshot creation scheduled."
create = send_4lw_cmd(cmd="csnp")
assert create == "Snapshot creation scheduled."
check = send_4lw_cmd(cmd="snpd")
assert (check == "Yes" or check == "No")
finally:
destroy_zk_client(zk)