Merge pull request #43026 from JackyWoo/keeper_manually_assign_leader

Keeper support manually assigning leader
This commit is contained in:
Antonio Andelic 2022-11-21 09:30:00 +01:00 committed by GitHub
commit 3cb202a63b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 105 additions and 17 deletions

View File

@ -57,7 +57,7 @@ Internal coordination settings are located in the `<keeper_server>.<coordination
- `auto_forwarding` — Allow to forward write requests from followers to the leader (default: true).
- `shutdown_timeout` — Wait to finish internal connections and shutdown (ms) (default: 5000).
- `startup_timeout` — If the server doesn't connect to other quorum participants in the specified timeout it will terminate (ms) (default: 30000).
- `four_letter_word_white_list` — White list of 4lw commands (default: `conf,cons,crst,envi,ruok,srst,srvr,stat,wchc,wchs,dirs,mntr,isro`).
- `four_letter_word_white_list` — White list of 4lw commands (default: `conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld`).
Quorum configuration is located in the `<keeper_server>.<raft_configuration>` section and contain servers description.
@ -126,7 +126,7 @@ clickhouse keeper --config /etc/your_path_to_config/config.xml
ClickHouse Keeper also provides 4lw commands which are almost the same with Zookeeper. Each command is composed of four letters such as `mntr`, `stat` etc. There are some more interesting commands: `stat` gives some general information about the server and connected clients, while `srvr` and `cons` give extended details on server and connections respectively.
The 4lw commands has a white list configuration `four_letter_word_white_list` which has default value `conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif`.
The 4lw commands has a white list configuration `four_letter_word_white_list` which has default value `conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld`.
You can issue the commands to ClickHouse Keeper via telnet or nc, at the client port.
@ -328,6 +328,12 @@ target_committed_log_idx 101
last_snapshot_idx 50
```
- `rqld`: Request to become new leader. Return `Sent leadership request to leader.` if request sent or `Failed to send leadership request to leader.` if request not sent. Note that if node is already leader the outcome is same as the request is sent.
```
Sent leadership request to leader.
```
## Migration from ZooKeeper {#migration-from-zookeeper}
Seamlessly migration from ZooKeeper to ClickHouse Keeper is impossible you have to stop your ZooKeeper cluster, convert data and start ClickHouse Keeper. `clickhouse-keeper-converter` tool allows converting ZooKeeper logs and snapshots to ClickHouse Keeper snapshot. It works only with ZooKeeper > 3.4. Steps for migration:

View File

@ -36,7 +36,7 @@ void CoordinationSettings::loadFromConfig(const String & config_elem, const Poco
}
const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD = "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif";
const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD = "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld";
KeeperConfigurationAndSettings::KeeperConfigurationAndSettings()
: server_id(NOT_EXIST)

View File

@ -142,6 +142,9 @@ void FourLetterCommandFactory::registerCommands(KeeperDispatcher & keeper_dispat
FourLetterCommandPtr log_info_command = std::make_shared<LogInfoCommand>(keeper_dispatcher);
factory.registerCommand(log_info_command);
FourLetterCommandPtr request_leader_command = std::make_shared<RequestLeaderCommand>(keeper_dispatcher);
factory.registerCommand(request_leader_command);
factory.initializeAllowList(keeper_dispatcher);
factory.setInitialize(true);
}
@ -507,4 +510,9 @@ String LogInfoCommand::run()
return ret.str();
}
String RequestLeaderCommand::run()
{
return keeper_dispatcher.requestLeader() ? "Sent leadership request to leader." : "Failed to send leadership request to leader.";
}
}

View File

@ -364,4 +364,17 @@ struct LogInfoCommand : public IFourLetterCommand
~LogInfoCommand() override = default;
};
/// Request to be leader.
struct RequestLeaderCommand : public IFourLetterCommand
{
explicit RequestLeaderCommand(KeeperDispatcher & keeper_dispatcher_)
: IFourLetterCommand(keeper_dispatcher_)
{
}
String name() override { return "rqld"; }
String run() override;
~RequestLeaderCommand() override = default;
};
}

View File

@ -215,6 +215,12 @@ public:
{
return server->getKeeperLogInfo();
}
/// Request to be leader.
bool requestLeader()
{
return server->requestLeader();
}
};
}

View File

@ -932,4 +932,9 @@ KeeperLogInfo KeeperServer::getKeeperLogInfo()
return log_info;
}
bool KeeperServer::requestLeader()
{
return isLeader() || raft_instance->request_leadership();
}
}

View File

@ -135,6 +135,8 @@ public:
uint64_t createSnapshot();
KeeperLogInfo getKeeperLogInfo();
bool requestLeader();
};
}

View File

@ -39,3 +39,15 @@ def wait_until_quorum_lost(cluster, node, port=9181):
def wait_nodes(cluster, nodes):
for node in nodes:
wait_until_connected(cluster, node)
def is_leader(cluster, node, port=9181):
stat = send_4lw_cmd(cluster, node, "stat", port)
return "Mode: leader" in stat
def get_leader(cluster, nodes):
for node in nodes:
if is_leader(cluster, node):
return node
raise Exception("No leader in Keeper cluster.")

View File

@ -33,7 +33,7 @@
<id>3</id>
<hostname>node3</hostname>
<port>9234</port>
<can_become_leader>false</can_become_leader>
<can_become_leader>true</can_become_leader>
<start_as_follower>true</start_as_follower>
<priority>1</priority>
</server>

View File

@ -33,7 +33,7 @@
<id>3</id>
<hostname>node3</hostname>
<port>9234</port>
<can_become_leader>false</can_become_leader>
<can_become_leader>true</can_become_leader>
<start_as_follower>true</start_as_follower>
<priority>1</priority>
</server>

View File

@ -33,7 +33,7 @@
<id>3</id>
<hostname>node3</hostname>
<port>9234</port>
<can_become_leader>false</can_become_leader>
<can_become_leader>true</can_become_leader>
<start_as_follower>true</start_as_follower>
<priority>1</priority>
</server>

View File

@ -148,10 +148,11 @@ def test_cmd_mntr(started_cluster):
wait_nodes()
clear_znodes()
leader = keeper_utils.get_leader(cluster, [node1, node2, node3])
# reset stat first
reset_node_stats(node1)
reset_node_stats(leader)
zk = get_fake_zk(node1.name, timeout=30.0)
zk = get_fake_zk(leader.name, timeout=30.0)
do_some_action(
zk,
create_cnt=10,
@ -162,7 +163,7 @@ def test_cmd_mntr(started_cluster):
delete_cnt=2,
)
data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="mntr")
data = keeper_utils.send_4lw_cmd(cluster, leader, cmd="mntr")
# print(data.decode())
reader = csv.reader(data.split("\n"), delimiter="\t")
@ -307,12 +308,13 @@ def test_cmd_srvr(started_cluster):
wait_nodes()
clear_znodes()
reset_node_stats(node1)
leader = keeper_utils.get_leader(cluster, [node1, node2, node3])
reset_node_stats(leader)
zk = get_fake_zk(node1.name, timeout=30.0)
zk = get_fake_zk(leader.name, timeout=30.0)
do_some_action(zk, create_cnt=10)
data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="srvr")
data = keeper_utils.send_4lw_cmd(cluster, leader, cmd="srvr")
print("srvr output -------------------------------------")
print(data)
@ -329,7 +331,7 @@ def test_cmd_srvr(started_cluster):
assert result["Received"] == "10"
assert result["Sent"] == "10"
assert int(result["Connections"]) == 1
assert int(result["Zxid"]) > 14
assert int(result["Zxid"]) > 10
assert result["Mode"] == "leader"
assert result["Node count"] == "13"
@ -342,13 +344,15 @@ def test_cmd_stat(started_cluster):
try:
wait_nodes()
clear_znodes()
reset_node_stats(node1)
reset_conn_stats(node1)
zk = get_fake_zk(node1.name, timeout=30.0)
leader = keeper_utils.get_leader(cluster, [node1, node2, node3])
reset_node_stats(leader)
reset_conn_stats(leader)
zk = get_fake_zk(leader.name, timeout=30.0)
do_some_action(zk, create_cnt=10)
data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="stat")
data = keeper_utils.send_4lw_cmd(cluster, leader, cmd="stat")
print("stat output -------------------------------------")
print(data)
@ -604,6 +608,10 @@ def test_cmd_csnp(started_cluster):
wait_nodes()
zk = get_fake_zk(node1.name, timeout=30.0)
data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="csnp")
print("csnp output -------------------------------------")
print(data)
try:
int(data)
assert True
@ -623,7 +631,10 @@ def test_cmd_lgif(started_cluster):
do_some_action(zk, create_cnt=100)
data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="lgif")
print("lgif output -------------------------------------")
print(data)
reader = csv.reader(data.split("\n"), delimiter="\t")
result = {}
@ -641,3 +652,28 @@ def test_cmd_lgif(started_cluster):
assert int(result["last_snapshot_idx"]) >= 1
finally:
destroy_zk_client(zk)
def test_cmd_rqld(started_cluster):
wait_nodes()
# node2 can not be leader
for node in [node1, node3]:
data = keeper_utils.send_4lw_cmd(cluster, node, cmd="rqld")
assert data == "Sent leadership request to leader."
print("rqld output -------------------------------------")
print(data)
if not keeper_utils.is_leader(cluster, node):
# pull wait to become leader
retry = 0
# TODO not a restrict way
while not keeper_utils.is_leader(cluster, node) and retry < 30:
time.sleep(1)
retry += 1
if retry == 30:
print(
node.name
+ " does not become leader after 30s, maybe there is something wrong."
)
assert keeper_utils.is_leader(cluster, node)