Merge pull request #33602 from ClickHouse/interpret_keeper_error_in_our_client

Make ZooKeeper client better interpret keeper server connection reject
This commit is contained in:
alesapin 2022-01-14 11:32:27 +03:00 committed by GitHub
commit d3a93f5d7e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 19 additions and 3 deletions

View File

@ -39,6 +39,7 @@ std::string toString(OpNum op_num);
OpNum getOpNum(int32_t raw_op_num);
static constexpr int32_t ZOOKEEPER_PROTOCOL_VERSION = 0;
static constexpr int32_t KEEPER_PROTOCOL_VERSION_CONNECTION_REJECT = 42;
static constexpr int32_t CLIENT_HANDSHAKE_LENGTH = 44;
static constexpr int32_t CLIENT_HANDSHAKE_LENGTH_WITH_READONLY = 45;
static constexpr int32_t SERVER_HANDSHAKE_LENGTH = 36;

View File

@ -489,7 +489,15 @@ void ZooKeeper::receiveHandshake()
read(protocol_version_read);
if (protocol_version_read != ZOOKEEPER_PROTOCOL_VERSION)
throw Exception("Unexpected protocol version: " + DB::toString(protocol_version_read), Error::ZMARSHALLINGERROR);
{
/// Special way to tell a client that server is not ready to serve it.
/// It's better for faster failover than just connection drop.
/// Implemented in clickhouse-keeper.
if (protocol_version_read == KEEPER_PROTOCOL_VERSION_CONNECTION_REJECT)
throw Exception("Keeper server rejected the connection during the handshake. Possibly it's overloaded, doesn't see leader or stale", Error::ZCONNECTIONLOSS);
else
throw Exception("Unexpected protocol version: " + DB::toString(protocol_version_read), Error::ZMARSHALLINGERROR);
}
read(timeout);
if (timeout != session_timeout.totalMilliseconds())

View File

@ -227,9 +227,16 @@ void KeeperTCPHandler::sendHandshake(bool has_leader)
{
Coordination::write(Coordination::SERVER_HANDSHAKE_LENGTH, *out);
if (has_leader)
{
Coordination::write(Coordination::ZOOKEEPER_PROTOCOL_VERSION, *out);
else /// Specially ignore connections if we are not leader, client will throw exception
Coordination::write(42, *out);
}
else
{
/// Ignore connections if we are not leader, client will throw exception
/// and reconnect to another replica faster. ClickHouse client provide
/// clear message for such protocol version.
Coordination::write(Coordination::KEEPER_PROTOCOL_VERSION_CONNECTION_REJECT, *out);
}
Coordination::write(static_cast<int32_t>(session_timeout.totalMilliseconds()), *out);
Coordination::write(session_id, *out);