ClickHouse/tests/integration/test_zookeeper_config_load_balancing/test.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

189 lines
6.1 KiB
Python
Raw Normal View History

import logging
import re
2024-06-22 23:58:28 +00:00
import time
2024-09-27 10:19:39 +00:00
2021-10-21 07:46:34 +00:00
import pytest
2024-09-27 10:19:39 +00:00
2021-10-21 07:46:34 +00:00
from helpers.cluster import ClickHouseCluster
from helpers.keeper_utils import ss_established
2022-03-23 12:39:07 +00:00
from helpers.network import PartitionManager
2024-06-22 23:58:28 +00:00
from helpers.test_tools import assert_eq_with_retry
2021-10-21 07:46:34 +00:00
2022-03-24 11:23:14 +00:00
cluster = ClickHouseCluster(
__file__, zookeeper_config_path="configs/zookeeper_load_balancing.xml"
)
2021-10-21 07:46:34 +00:00
2022-03-21 14:55:01 +00:00
# use 3-letter hostnames, so getHostNameDifference("nod1", "zoo1") will work as expected
2022-03-24 11:23:14 +00:00
node1 = cluster.add_instance(
"nod1", with_zookeeper=True, main_configs=["configs/zookeeper_load_balancing.xml"]
)
node2 = cluster.add_instance(
"nod2", with_zookeeper=True, main_configs=["configs/zookeeper_load_balancing.xml"]
)
node3 = cluster.add_instance(
"nod3", with_zookeeper=True, main_configs=["configs/zookeeper_load_balancing.xml"]
)
2024-06-22 23:58:28 +00:00
node4 = cluster.add_instance(
"nod4", with_zookeeper=True, main_configs=["configs/zookeeper_load_balancing2.xml"]
)
zk1_re = re.compile(
r"testzookeeperconfigloadbalancing-(gw\d+-)?zoo1-1"
r".*testzookeeperconfigloadbalancing(-gw\d+)?_default:2181"
)
zk2_re = re.compile(
r"testzookeeperconfigloadbalancing-(gw\d+-)?zoo2-1"
r".*testzookeeperconfigloadbalancing(-gw\d+)?_default:2181"
)
zk3_re = re.compile(
r"testzookeeperconfigloadbalancing-(gw\d+-)?zoo3-1"
r".*testzookeeperconfigloadbalancing(-gw\d+)?_default:2181"
)
2022-03-22 18:05:37 +00:00
2022-03-21 13:22:15 +00:00
def change_balancing(old, new, reload=True):
2022-03-24 11:23:14 +00:00
line = "<zookeeper_load_balancing>{}<"
2022-03-21 13:22:15 +00:00
old_line = line.format(old)
new_line = line.format(new)
for node in [node1, node2, node3]:
2022-03-24 11:23:14 +00:00
node.replace_in_config(
"/etc/clickhouse-server/config.d/zookeeper_load_balancing.xml",
old_line,
new_line,
)
2022-03-21 13:22:15 +00:00
if reload:
2022-03-22 18:05:37 +00:00
node.query("select '{}', '{}'".format(old, new))
2022-03-24 11:23:14 +00:00
node.query("system reload config")
2021-10-21 07:46:34 +00:00
2021-11-02 07:40:05 +00:00
@pytest.fixture(scope="module")
2021-10-21 07:46:34 +00:00
def started_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def test_first_or_random(started_cluster):
2022-03-21 13:22:15 +00:00
try:
2022-03-24 11:23:14 +00:00
change_balancing("random", "first_or_random")
for node in (node1, node2, node3):
connections = (
node.exec_in_container(ss_established, privileged=True, user="root")
.strip()
.split("\n")
2022-03-24 11:23:14 +00:00
)
logging.debug("Established connections for 2181:\n%s", connections)
assert len(connections) == 1
assert zk1_re.search(connections[0])
2022-03-21 13:22:15 +00:00
finally:
2022-03-24 11:23:14 +00:00
change_balancing("first_or_random", "random", reload=False)
2022-03-21 13:22:15 +00:00
def test_in_order(started_cluster):
try:
2022-03-24 11:23:14 +00:00
change_balancing("random", "in_order")
for node in (node1, node2, node3):
connections = (
node.exec_in_container(ss_established, privileged=True, user="root")
.strip()
.split("\n")
2022-03-24 11:23:14 +00:00
)
logging.debug("Established connections for 2181:\n%s", connections)
assert len(connections) == 1
assert zk1_re.search(connections[0])
2022-03-21 13:22:15 +00:00
finally:
2022-03-24 11:23:14 +00:00
change_balancing("in_order", "random", reload=False)
2021-11-02 07:40:05 +00:00
2022-03-21 13:22:15 +00:00
def test_nearest_hostname(started_cluster):
try:
2022-03-24 11:23:14 +00:00
change_balancing("random", "nearest_hostname")
for node, regexp in ((node1, zk1_re), (node2, zk2_re), (node3, zk3_re)):
connections = (
node.exec_in_container(ss_established, privileged=True, user="root")
.strip()
.split("\n")
2022-03-24 11:23:14 +00:00
)
logging.debug("Established connections for 2181:\n%s", connections)
assert len(connections) == 1
assert regexp.search(connections[0])
2022-03-21 13:22:15 +00:00
finally:
2022-03-24 11:23:14 +00:00
change_balancing("nearest_hostname", "random", reload=False)
2022-03-21 13:22:15 +00:00
def test_hostname_levenshtein_distance(started_cluster):
try:
change_balancing("random", "hostname_levenshtein_distance")
for node, regexp in ((node1, zk1_re), (node2, zk2_re), (node3, zk3_re)):
connections = (
node.exec_in_container(ss_established, privileged=True, user="root")
.strip()
.split("\n")
)
logging.debug("Established connections for 2181:\n%s", connections)
assert len(connections) == 1
assert regexp.search(connections[0])
finally:
change_balancing("hostname_levenshtein_distance", "random", reload=False)
2022-03-21 13:22:15 +00:00
def test_round_robin(started_cluster):
2022-03-23 12:39:07 +00:00
pm = PartitionManager()
2022-03-21 13:22:15 +00:00
try:
2022-03-24 11:23:14 +00:00
change_balancing("random", "round_robin")
2024-06-26 21:52:44 +00:00
for node in [node1, node2, node3]:
idx = int(
node.query("select index from system.zookeeper_connection").strip()
2022-03-24 11:23:14 +00:00
)
2024-06-26 21:52:44 +00:00
new_idx = (idx + 1) % 3
2022-03-21 13:22:15 +00:00
2024-06-26 21:52:44 +00:00
pm._add_rule(
{
"source": node.ip_address,
"destination": cluster.get_instance_ip("zoo" + str(idx + 1)),
"action": "REJECT --reject-with tcp-reset",
}
2022-03-24 11:23:14 +00:00
)
2022-03-21 13:22:15 +00:00
2024-06-26 21:52:44 +00:00
assert_eq_with_retry(
node,
"select index from system.zookeeper_connection",
str(new_idx) + "\n",
2022-03-24 11:23:14 +00:00
)
2024-06-26 21:52:44 +00:00
pm.heal_all()
2022-03-21 13:22:15 +00:00
finally:
2022-03-23 12:39:07 +00:00
pm.heal_all()
2022-03-24 11:23:14 +00:00
change_balancing("round_robin", "random", reload=False)
2024-06-22 23:58:28 +00:00
def test_az(started_cluster):
pm = PartitionManager()
try:
# make sure it disconnects from the optimal node
pm._add_rule(
{
2024-06-23 11:57:20 +00:00
"source": node4.ip_address,
2024-06-22 23:58:28 +00:00
"destination": cluster.get_instance_ip("zoo2"),
"action": "REJECT --reject-with tcp-reset",
}
)
node4.query_with_retry("select * from system.zookeeper where path='/'")
assert "az2\n" != node4.query(
"select availability_zone from system.zookeeper_connection"
)
# fallback_session_lifetime.max is 1 second, but it shouldn't drop current session until the node becomes available
time.sleep(5) # this is fine
2024-06-23 21:55:45 +00:00
assert 5 <= int(node4.query("select zookeeperSessionUptime()").strip())
2024-06-22 23:58:28 +00:00
pm.heal_all()
assert_eq_with_retry(
node4, "select availability_zone from system.zookeeper_connection", "az2\n"
)
finally:
pm.heal_all()