ClickHouse/tests/integration/test_host_ip_change/test.py

203 lines
8.6 KiB
Python
Raw Normal View History

2019-06-04 20:59:31 +00:00
import pytest
2019-07-03 16:37:37 +00:00
from helpers.client import QueryRuntimeException
from helpers.cluster import ClickHouseCluster
2019-07-03 16:37:37 +00:00
from helpers.test_tools import TSV
from helpers.test_tools import assert_eq_with_retry
2019-06-04 20:59:31 +00:00
cluster = ClickHouseCluster(__file__)
2019-07-03 16:37:37 +00:00
def _fill_nodes(nodes, table_name):
for node in nodes:
node.query(
'''
CREATE DATABASE IF NOT EXISTS test;
CREATE TABLE IF NOT EXISTS {0}(date Date, id UInt32)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/{0}', '{1}')
ORDER BY id PARTITION BY toYYYYMM(date);
'''.format(table_name, node.name)
)
node1 = cluster.add_instance('node1', main_configs=['configs/listen_host.xml'], with_zookeeper=True,
ipv6_address='2001:3984:3989::1:1111')
2019-07-03 16:37:37 +00:00
node2 = cluster.add_instance('node2', main_configs=['configs/listen_host.xml', 'configs/dns_update_long.xml'],
with_zookeeper=True, ipv6_address='2001:3984:3989::1:1112')
2019-06-04 20:59:31 +00:00
@pytest.fixture(scope="module")
2019-07-03 16:37:37 +00:00
def cluster_without_dns_cache_update():
2019-06-04 20:59:31 +00:00
try:
cluster.start()
2019-07-03 16:37:37 +00:00
_fill_nodes([node1, node2], 'test_table_drop')
2019-06-04 20:59:31 +00:00
yield cluster
except Exception as ex:
print ex
finally:
cluster.shutdown()
pass
2019-07-03 16:37:37 +00:00
# node1 is a source, node2 downloads data
# node2 has long dns_cache_update_period, so dns cache update wouldn't work
def test_ip_change_drop_dns_cache(cluster_without_dns_cache_update):
# First we check, that normal replication works
2019-07-03 16:37:37 +00:00
node1.query("INSERT INTO test_table_drop VALUES ('2018-10-01', 1), ('2018-10-02', 2), ('2018-10-03', 3)")
assert node1.query("SELECT count(*) from test_table_drop") == "3\n"
assert_eq_with_retry(node2, "SELECT count(*) from test_table_drop", "3")
2019-06-04 20:59:31 +00:00
# We change source node ip
2019-06-04 20:59:31 +00:00
cluster.restart_instance_with_ip_change(node1, "2001:3984:3989::1:7777")
# Put some data to source node1
2019-07-03 16:37:37 +00:00
node1.query("INSERT INTO test_table_drop VALUES ('2018-10-01', 5), ('2018-10-02', 6), ('2018-10-03', 7)")
# Check that data is placed on node1
2019-07-03 16:37:37 +00:00
assert node1.query("SELECT count(*) from test_table_drop") == "6\n"
2019-06-04 20:59:31 +00:00
# Because of DNS cache dest node2 cannot download data from node1
2019-06-04 20:59:31 +00:00
with pytest.raises(Exception):
2019-07-03 16:37:37 +00:00
assert_eq_with_retry(node2, "SELECT count(*) from test_table_drop", "6")
2019-06-04 20:59:31 +00:00
# drop DNS cache
2019-06-04 20:59:31 +00:00
node2.query("SYSTEM DROP DNS CACHE")
# Data is downloaded
2019-07-03 16:37:37 +00:00
assert_eq_with_retry(node2, "SELECT count(*) from test_table_drop", "6")
# Just to be sure check one more time
node1.query("INSERT INTO test_table_drop VALUES ('2018-10-01', 8)")
assert node1.query("SELECT count(*) from test_table_drop") == "7\n"
assert_eq_with_retry(node2, "SELECT count(*) from test_table_drop", "7")
node3 = cluster.add_instance('node3', main_configs=['configs/listen_host.xml'],
with_zookeeper=True, ipv6_address='2001:3984:3989::1:1113')
node4 = cluster.add_instance('node4', main_configs=['configs/remote_servers.xml', 'configs/listen_host.xml',
'configs/dns_update_short.xml'],
with_zookeeper=True, ipv6_address='2001:3984:3989::1:1114')
2019-07-03 16:37:37 +00:00
@pytest.fixture(scope="module")
def cluster_with_dns_cache_update():
try:
cluster.start()
_fill_nodes([node3, node4], 'test_table_update')
yield cluster
except Exception as ex:
print ex
finally:
cluster.shutdown()
pass
2019-07-03 16:37:37 +00:00
# node3 is a source, node4 downloads data
# node4 has short dns_cache_update_period, so testing update of dns cache
def test_ip_change_update_dns_cache(cluster_with_dns_cache_update):
# First we check, that normal replication works
node3.query("INSERT INTO test_table_update VALUES ('2018-10-01', 1), ('2018-10-02', 2), ('2018-10-03', 3)")
assert node3.query("SELECT count(*) from test_table_update") == "3\n"
assert_eq_with_retry(node4, "SELECT count(*) from test_table_update", "3")
# We change source node ip
cluster.restart_instance_with_ip_change(node3, "2001:3984:3989::1:8888")
# Put some data to source node3
node3.query("INSERT INTO test_table_update VALUES ('2018-10-01', 5), ('2018-10-02', 6), ('2018-10-03', 7)")
2020-09-01 06:38:23 +00:00
2019-07-03 16:37:37 +00:00
# Check that data is placed on node3
assert node3.query("SELECT count(*) from test_table_update") == "6\n"
curl_result = node4.exec_in_container(["bash", "-c", "curl -s 'node3:8123'"])
assert curl_result == 'Ok.\n'
cat_resolv = node4.exec_in_container(["bash", "-c", "cat /etc/resolv.conf"])
print("RESOLV {}".format(cat_resolv))
2020-09-04 15:22:43 +00:00
assert_eq_with_retry(node4, "SELECT * FROM remote('node3', 'system', 'one')", "0", sleep_time=0.5)
2020-09-01 06:38:23 +00:00
2019-07-03 16:37:37 +00:00
# Because of DNS cache update, ip of node3 would be updated
2020-09-01 06:38:23 +00:00
assert_eq_with_retry(node4, "SELECT count(*) from test_table_update", "6", sleep_time=3)
# Just to be sure check one more time
2019-07-03 16:37:37 +00:00
node3.query("INSERT INTO test_table_update VALUES ('2018-10-01', 8)")
assert node3.query("SELECT count(*) from test_table_update") == "7\n"
assert_eq_with_retry(node4, "SELECT count(*) from test_table_update", "7")
2020-04-30 22:29:47 +00:00
def set_hosts(node, hosts):
new_content = '\\n'.join(['127.0.0.1 localhost', '::1 localhost'] + hosts)
node.exec_in_container(['bash', '-c', 'echo -e "{}" > /etc/hosts'.format(new_content)], privileged=True,
user='root')
2020-04-30 22:29:47 +00:00
2019-07-03 16:37:37 +00:00
def test_dns_cache_update(cluster_with_dns_cache_update):
2020-04-30 22:29:47 +00:00
set_hosts(node4, ['127.255.255.255 lost_host'])
2019-07-03 16:37:37 +00:00
with pytest.raises(QueryRuntimeException):
node4.query("SELECT * FROM remote('lost_host', 'system', 'one')")
node4.query(
"CREATE TABLE distributed_lost_host (dummy UInt8) ENGINE = Distributed(lost_host_cluster, 'system', 'one')")
2019-07-03 16:37:37 +00:00
with pytest.raises(QueryRuntimeException):
node4.query("SELECT * FROM distributed_lost_host")
2020-04-30 22:29:47 +00:00
set_hosts(node4, ['127.0.0.1 lost_host'])
2019-07-03 16:37:37 +00:00
# Wait a bit until dns cache will be updated
assert_eq_with_retry(node4, "SELECT * FROM remote('lost_host', 'system', 'one')", "0")
assert_eq_with_retry(node4, "SELECT * FROM distributed_lost_host", "0")
assert TSV(node4.query(
"SELECT DISTINCT host_name, host_address FROM system.clusters WHERE cluster='lost_host_cluster'")) == TSV(
"lost_host\t127.0.0.1\n")
2019-07-03 23:05:59 +00:00
assert TSV(node4.query("SELECT hostName()")) == TSV("node4")
2020-04-30 22:29:47 +00:00
# Check SYSTEM DROP DNS CACHE on node5 and background cache update on node6
node5 = cluster.add_instance('node5', main_configs=['configs/listen_host.xml', 'configs/dns_update_long.xml'],
user_configs=['configs/users_with_hostname.xml'], ipv6_address='2001:3984:3989::1:1115')
node6 = cluster.add_instance('node6', main_configs=['configs/listen_host.xml', 'configs/dns_update_short.xml'],
user_configs=['configs/users_with_hostname.xml'], ipv6_address='2001:3984:3989::1:1116')
@pytest.mark.parametrize("node", [node5, node6])
def test_user_access_ip_change(cluster_with_dns_cache_update, node):
node_name = node.name
node_num = node.name[-1]
# getaddrinfo(...) may hang for a log time without this options
node.exec_in_container(['bash', '-c', 'echo -e "options timeout:1\noptions attempts:2" >> /etc/resolv.conf'],
privileged=True, user='root')
assert node3.query("SELECT * FROM remote('{}', 'system', 'one')".format(node_name)) == "0\n"
assert node4.query("SELECT * FROM remote('{}', 'system', 'one')".format(node_name)) == "0\n"
2020-04-30 22:29:47 +00:00
set_hosts(node, ['127.255.255.255 node3', '2001:3984:3989::1:88{}4 unknown_host'.format(node_num)])
2020-04-30 22:29:47 +00:00
cluster.restart_instance_with_ip_change(node3, "2001:3984:3989::1:88{}3".format(node_num))
cluster.restart_instance_with_ip_change(node4, "2001:3984:3989::1:88{}4".format(node_num))
2020-04-30 22:29:47 +00:00
with pytest.raises(QueryRuntimeException):
node3.query("SELECT * FROM remote('{}', 'system', 'one')".format(node_name))
2020-04-30 22:29:47 +00:00
with pytest.raises(QueryRuntimeException):
node4.query("SELECT * FROM remote('{}', 'system', 'one')".format(node_name))
2020-04-30 22:29:47 +00:00
# now wrong addresses are cached
set_hosts(node, [])
retry_count = 60
if node_name == 'node5':
# client is not allowed to connect, so execute it directly in container to send query from localhost
node.exec_in_container(['bash', '-c', 'clickhouse client -q "SYSTEM DROP DNS CACHE"'], privileged=True,
user='root')
retry_count = 1
2020-04-30 22:29:47 +00:00
assert_eq_with_retry(node3, "SELECT * FROM remote('{}', 'system', 'one')".format(node_name), "0",
retry_count=retry_count, sleep_time=1)
assert_eq_with_retry(node4, "SELECT * FROM remote('{}', 'system', 'one')".format(node_name), "0",
retry_count=retry_count, sleep_time=1)