More reliable test keeper tests

This commit is contained in:
alesapin 2021-02-17 14:53:47 +03:00
parent acf843a01a
commit 9396bae2e2
7 changed files with 444 additions and 260 deletions

View File

@ -31,7 +31,7 @@ struct ChangelogDirTest
{
std::string path;
bool drop;
ChangelogDirTest(std::string path_, bool drop_ = true)
explicit ChangelogDirTest(std::string path_, bool drop_ = true)
: path(path_)
, drop(drop_)
{

View File

@ -8,27 +8,18 @@ from multiprocessing.dummy import Pool
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance('node', main_configs=['configs/enable_test_keeper.xml', 'configs/logs_conf.xml'], with_zookeeper=True)
from kazoo.client import KazooClient, KazooState
_genuine_zk_instance = None
_fake_zk_instance = None
from kazoo.client import KazooClient, KazooState, KeeperState
def get_genuine_zk():
global _genuine_zk_instance
if not _genuine_zk_instance:
print("Zoo1", cluster.get_instance_ip("zoo1"))
_genuine_zk_instance = cluster.get_kazoo_client('zoo1')
return _genuine_zk_instance
return cluster.get_kazoo_client('zoo1')
def get_fake_zk():
global _fake_zk_instance
if not _fake_zk_instance:
print("node", cluster.get_instance_ip("node"))
_fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip("node") + ":9181", timeout=30.0)
def reset_last_zxid_listener(state):
print("Fake zk callback called for state", state)
global _fake_zk_instance
nonlocal _fake_zk_instance
if state != KazooState.CONNECTED:
_fake_zk_instance._reset()
@ -44,6 +35,15 @@ def create_random_path(prefix="", depth=1):
return prefix
return create_random_path(os.path.join(prefix, random_string(3)), depth - 1)
def stop_zk(zk):
try:
if zk:
zk.stop()
zk.close()
except:
pass
@pytest.fixture(scope="module")
def started_cluster():
try:
@ -53,15 +53,10 @@ def started_cluster():
finally:
cluster.shutdown()
if _genuine_zk_instance:
_genuine_zk_instance.stop()
_genuine_zk_instance.close()
if _fake_zk_instance:
_fake_zk_instance.stop()
_fake_zk_instance.close()
def test_simple_commands(started_cluster):
try:
genuine_zk = get_genuine_zk()
fake_zk = get_fake_zk()
@ -75,9 +70,13 @@ def test_simple_commands(started_cluster):
assert zk.exists("/test_simple_commands/somenode1")
print(zk.get("/test_simple_commands/somenode1"))
assert zk.get("/test_simple_commands/somenode1")[0] == b"world"
finally:
for zk in [genuine_zk, fake_zk]:
stop_zk(zk)
def test_sequential_nodes(started_cluster):
try:
genuine_zk = get_genuine_zk()
fake_zk = get_fake_zk()
genuine_zk.create("/test_sequential_nodes")
@ -91,6 +90,9 @@ def test_sequential_nodes(started_cluster):
genuine_childs = list(sorted(genuine_zk.get_children("/test_sequential_nodes")))
fake_childs = list(sorted(fake_zk.get_children("/test_sequential_nodes")))
assert genuine_childs == fake_childs
finally:
for zk in [genuine_zk, fake_zk]:
stop_zk(zk)
def assert_eq_stats(stat1, stat2):
@ -102,6 +104,7 @@ def assert_eq_stats(stat1, stat2):
assert stat1.numChildren == stat2.numChildren
def test_stats(started_cluster):
try:
genuine_zk = get_genuine_zk()
fake_zk = get_fake_zk()
genuine_zk.create("/test_stats_nodes")
@ -139,8 +142,12 @@ def test_stats(started_cluster):
print(genuine_stats)
print(fake_stats)
assert_eq_stats(genuine_stats, fake_stats)
finally:
for zk in [genuine_zk, fake_zk]:
stop_zk(zk)
def test_watchers(started_cluster):
try:
genuine_zk = get_genuine_zk()
fake_zk = get_fake_zk()
genuine_zk.create("/test_data_watches")
@ -196,8 +203,12 @@ def test_watchers(started_cluster):
print("Genuine children", genuine_children)
print("Fake children", fake_children)
assert genuine_children == fake_children
finally:
for zk in [genuine_zk, fake_zk]:
stop_zk(zk)
def test_multitransactions(started_cluster):
try:
genuine_zk = get_genuine_zk()
fake_zk = get_fake_zk()
for zk in [genuine_zk, fake_zk]:
@ -225,7 +236,9 @@ def test_multitransactions(started_cluster):
assert zk.exists('/test_multitransactions/q') is None
assert zk.exists('/test_multitransactions/a') is None
assert zk.exists('/test_multitransactions/x') is None
finally:
for zk in [genuine_zk, fake_zk]:
stop_zk(zk)
def exists(zk, path):
result = zk.exists(path)
@ -278,13 +291,13 @@ class Request(object):
arg_str = ', '.join([str(k) + "=" + str(v) for k, v in self.arguments.items()])
return "ZKRequest name {} with arguments {}".format(self.name, arg_str)
def generate_requests(iters=1):
def generate_requests(prefix="/", iters=1):
requests = []
existing_paths = []
for i in range(iters):
for _ in range(100):
rand_length = random.randint(0, 10)
path = "/"
path = prefix
for j in range(1, rand_length):
path = create_random_path(path, 1)
existing_paths.append(path)
@ -322,9 +335,13 @@ def generate_requests(iters=1):
def test_random_requests(started_cluster):
requests = generate_requests(10)
try:
requests = generate_requests("/test_random_requests", 10)
print("Generated", len(requests), "requests")
genuine_zk = get_genuine_zk()
fake_zk = get_fake_zk()
genuine_zk.create("/test_random_requests")
fake_zk.create("/test_random_requests")
for i, request in enumerate(requests):
genuine_throw = False
fake_throw = False
@ -333,20 +350,28 @@ def test_random_requests(started_cluster):
try:
genuine_result = request.callback(genuine_zk)
except Exception as ex:
print("i", i, "request", request)
print("Genuine exception", str(ex))
genuine_throw = True
try:
fake_result = request.callback(fake_zk)
except Exception as ex:
print("i", i, "request", request)
print("Fake exception", str(ex))
fake_throw = True
assert fake_throw == genuine_throw, "Fake throw genuine not or vise versa"
assert fake_throw == genuine_throw, "Fake throw genuine not or vise versa request {}"
assert fake_result == genuine_result, "Zookeeper results differ"
root_children_genuine = [elem for elem in list(sorted(genuine_zk.get_children("/"))) if elem not in ('clickhouse', 'zookeeper')]
root_children_fake = [elem for elem in list(sorted(fake_zk.get_children("/"))) if elem not in ('clickhouse', 'zookeeper')]
root_children_genuine = [elem for elem in list(sorted(genuine_zk.get_children("/test_random_requests"))) if elem not in ('clickhouse', 'zookeeper')]
root_children_fake = [elem for elem in list(sorted(fake_zk.get_children("/test_random_requests"))) if elem not in ('clickhouse', 'zookeeper')]
assert root_children_fake == root_children_genuine
finally:
for zk in [genuine_zk, fake_zk]:
stop_zk(zk)
def test_end_of_session(started_cluster):
fake_zk1 = None
fake_zk2 = None
genuine_zk1 = None
@ -401,13 +426,8 @@ def test_end_of_session(started_cluster):
assert fake_ephemeral_event == genuine_ephemeral_event
finally:
try:
for zk in [fake_zk1, fake_zk2, genuine_zk1, genuine_zk2]:
if zk:
zk.stop()
zk.close()
except:
pass
stop_zk(zk)
def test_end_of_watches_session(started_cluster):
fake_zk1 = None
@ -442,15 +462,11 @@ def test_end_of_watches_session(started_cluster):
assert dummy_set == 2
finally:
try:
for zk in [fake_zk1, fake_zk2]:
if zk:
zk.stop()
zk.close()
except:
pass
stop_zk(zk)
def test_concurrent_watches(started_cluster):
try:
fake_zk = get_fake_zk()
fake_zk.restart()
global_path = "/test_concurrent_watches_0"
@ -530,3 +546,5 @@ def test_concurrent_watches(started_cluster):
print("Diff", list(set(all_paths_created) - set(all_paths_triggered)))
assert dumb_watch_triggered_counter == watches_must_be_triggered
finally:
stop_zk(fake_zk)

View File

@ -0,0 +1 @@
#!/usr/bin/env python3

View File

@ -0,0 +1,21 @@
<yandex>
<test_keeper_server>
<tcp_port>9181</tcp_port>
<server_id>1</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<coordination_settings>
<operation_timeout_ms>5000</operation_timeout_ms>
<session_timeout_ms>10000</session_timeout_ms>
<raft_logs_level>trace</raft_logs_level>
</coordination_settings>
<raft_configuration>
<server>
<id>1</id>
<hostname>localhost</hostname>
<port>44444</port>
</server>
</raft_configuration>
</test_keeper_server>
</yandex>

View File

@ -0,0 +1,12 @@
<yandex>
<shutdown_wait_unfinished>3</shutdown_wait_unfinished>
<logger>
<level>trace</level>
<log>/var/log/clickhouse-server/log.log</log>
<errorlog>/var/log/clickhouse-server/log.err.log</errorlog>
<size>1000M</size>
<count>10</count>
<stderr>/var/log/clickhouse-server/stderr.log</stderr>
<stdout>/var/log/clickhouse-server/stdout.log</stdout>
</logger>
</yandex>

View File

@ -0,0 +1,8 @@
<yandex>
<zookeeper>
<node index="1">
<host>node1</host>
<port>9181</port>
</node>
</zookeeper>
</yandex>

View File

@ -0,0 +1,124 @@
#!/usr/bin/env python3
import pytest
from helpers.cluster import ClickHouseCluster
import random
import string
import os
import time
from kazoo.client import KazooClient, KazooState
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance('node', main_configs=['configs/enable_test_keeper.xml', 'configs/logs_conf.xml', 'configs/use_test_keeper.xml'], stay_alive=True)
def random_string(length):
return ''.join(random.choices(string.ascii_lowercase + string.digits, k=length))
def create_random_path(prefix="", depth=1):
if depth == 0:
return prefix
return create_random_path(os.path.join(prefix, random_string(3)), depth - 1)
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def get_connection_zk(nodename, timeout=30.0):
_fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout)
def reset_listener(state):
nonlocal _fake_zk_instance
print("Fake zk callback called for state", state)
if state != KazooState.CONNECTED:
_fake_zk_instance._reset()
_fake_zk_instance.add_listener(reset_listener)
_fake_zk_instance.start()
return _fake_zk_instance
def test_state_after_restart(started_cluster):
try:
node_zk = None
node_zk2 = None
node_zk = get_connection_zk("node")
node_zk.create("/test_state_after_restart", b"somevalue")
strs = []
for i in range(100):
strs.append(random_string(123).encode())
node_zk.create("/test_state_after_restart/node" + str(i), strs[i])
for i in range(100):
if i % 7 == 0:
node_zk.delete("/test_state_after_restart/node" + str(i))
node.restart_clickhouse(kill=True)
node_zk2 = get_connection_zk("node")
assert node_zk2.get("/test_state_after_restart")[0] == b"somevalue"
for i in range(100):
if i % 7 == 0:
assert node_zk2.exists("/test_state_after_restart/node" + str(i)) is None
else:
assert len(node_zk2.get("/test_state_after_restart/node" + str(i))[0]) == 123
assert node_zk2.get("/test_state_after_restart/node" + str(i))[0] == strs[i]
finally:
try:
if node_zk is not None:
node_zk.stop()
node_zk.close()
if node_zk2 is not None:
node_zk2.stop()
node_zk2.close()
except:
pass
# http://zookeeper-user.578899.n2.nabble.com/Why-are-ephemeral-nodes-written-to-disk-tp7583403p7583418.html
def test_ephemeral_after_restart(started_cluster):
try:
node_zk = None
node_zk2 = None
node_zk = get_connection_zk("node")
node_zk.create("/test_ephemeral_after_restart", b"somevalue")
strs = []
for i in range(100):
strs.append(random_string(123).encode())
node_zk.create("/test_ephemeral_after_restart/node" + str(i), strs[i], ephemeral=True)
for i in range(100):
if i % 7 == 0:
node_zk.delete("/test_ephemeral_after_restart/node" + str(i))
node.restart_clickhouse(kill=True)
node_zk2 = get_connection_zk("node")
assert node_zk2.get("/test_ephemeral_after_restart")[0] == b"somevalue"
for i in range(100):
if i % 7 == 0:
assert node_zk2.exists("/test_ephemeral_after_restart/node" + str(i)) is None
else:
assert len(node_zk2.get("/test_ephemeral_after_restart/node" + str(i))[0]) == 123
assert node_zk2.get("/test_ephemeral_after_restart/node" + str(i))[0] == strs[i]
finally:
try:
if node_zk is not None:
node_zk.stop()
node_zk.close()
if node_zk2 is not None:
node_zk2.stop()
node_zk2.close()
except:
pass