2020-06-13 18:33:58 +00:00
|
|
|
# pylint: disable=unused-argument
|
|
|
|
# pylint: disable=redefined-outer-name
|
|
|
|
# pylint: disable=line-too-long
|
|
|
|
|
|
|
|
import uuid
|
|
|
|
|
2020-09-16 04:26:10 +00:00
|
|
|
import pytest
|
2020-06-13 18:33:58 +00:00
|
|
|
from helpers.cluster import ClickHouseCluster
|
|
|
|
|
|
|
|
cluster = ClickHouseCluster(__file__)
|
|
|
|
|
|
|
|
n1 = cluster.add_instance('n1', main_configs=['configs/remote_servers.xml'])
|
|
|
|
n2 = cluster.add_instance('n2', main_configs=['configs/remote_servers.xml'])
|
|
|
|
n3 = cluster.add_instance('n3', main_configs=['configs/remote_servers.xml'])
|
|
|
|
|
|
|
|
nodes = len(cluster.instances)
|
2020-09-16 04:26:10 +00:00
|
|
|
queries = nodes * 5
|
|
|
|
|
2020-06-13 18:33:58 +00:00
|
|
|
|
2020-06-21 08:59:54 +00:00
|
|
|
def bootstrap():
|
2020-10-02 16:54:07 +00:00
|
|
|
for n in list(cluster.instances.values()):
|
2020-06-21 08:59:54 +00:00
|
|
|
# At startup, server loads configuration files.
|
|
|
|
#
|
|
|
|
# However ConfigReloader does not know about already loaded files
|
|
|
|
# (files is empty()), hence it will always reload the configuration
|
|
|
|
# just after server starts (+ 2 seconds, reload timeout).
|
|
|
|
#
|
|
|
|
# And on configuration reload the clusters will be re-created, so some
|
2020-10-27 11:04:03 +00:00
|
|
|
# internal stuff will be reset:
|
2020-06-21 08:59:54 +00:00
|
|
|
# - error_count
|
|
|
|
# - last_used (round_robing)
|
|
|
|
#
|
|
|
|
# And if the reload will happen during round_robin test it will start
|
|
|
|
# querying from the beginning, so let's issue config reload just after
|
|
|
|
# start to avoid reload in the middle of the test execution.
|
|
|
|
n.query('SYSTEM RELOAD CONFIG')
|
2020-06-13 18:33:58 +00:00
|
|
|
n.query('DROP TABLE IF EXISTS data')
|
|
|
|
n.query('DROP TABLE IF EXISTS dist')
|
|
|
|
n.query('CREATE TABLE data (key Int) Engine=Memory()')
|
|
|
|
n.query("""
|
|
|
|
CREATE TABLE dist AS data
|
|
|
|
Engine=Distributed(
|
|
|
|
replicas_cluster,
|
|
|
|
currentDatabase(),
|
|
|
|
data)
|
|
|
|
""".format())
|
2020-06-27 07:29:47 +00:00
|
|
|
n.query("""
|
|
|
|
CREATE TABLE dist_priority AS data
|
|
|
|
Engine=Distributed(
|
|
|
|
replicas_priority_cluster,
|
|
|
|
currentDatabase(),
|
|
|
|
data)
|
|
|
|
""".format())
|
2020-06-30 08:09:06 +00:00
|
|
|
n.query("""
|
|
|
|
CREATE TABLE dist_priority_negative AS data
|
|
|
|
Engine=Distributed(
|
|
|
|
replicas_priority_negative_cluster,
|
|
|
|
currentDatabase(),
|
|
|
|
data)
|
|
|
|
""".format())
|
2020-06-13 18:33:58 +00:00
|
|
|
|
2020-09-16 04:26:10 +00:00
|
|
|
|
2020-06-13 18:33:58 +00:00
|
|
|
def make_uuid():
|
|
|
|
return uuid.uuid4().hex
|
|
|
|
|
2020-09-16 04:26:10 +00:00
|
|
|
|
2020-06-13 18:33:58 +00:00
|
|
|
@pytest.fixture(scope='module', autouse=True)
|
|
|
|
def start_cluster():
|
|
|
|
try:
|
|
|
|
cluster.start()
|
2020-06-21 08:59:54 +00:00
|
|
|
bootstrap()
|
2020-06-13 18:33:58 +00:00
|
|
|
yield cluster
|
|
|
|
finally:
|
|
|
|
cluster.shutdown()
|
|
|
|
|
2020-09-16 04:26:10 +00:00
|
|
|
|
2020-06-27 07:29:47 +00:00
|
|
|
def get_node(query_node, table='dist', *args, **kwargs):
|
2020-06-13 18:33:58 +00:00
|
|
|
query_id = make_uuid()
|
|
|
|
|
|
|
|
settings = {
|
|
|
|
'query_id': query_id,
|
|
|
|
'log_queries': 1,
|
|
|
|
'log_queries_min_type': 'QUERY_START',
|
|
|
|
'prefer_localhost_replica': 0,
|
|
|
|
}
|
|
|
|
if 'settings' not in kwargs:
|
|
|
|
kwargs['settings'] = settings
|
|
|
|
else:
|
|
|
|
kwargs['settings'].update(settings)
|
|
|
|
|
2020-06-27 07:29:47 +00:00
|
|
|
query_node.query('SELECT * FROM ' + table, *args, **kwargs)
|
2020-06-13 18:33:58 +00:00
|
|
|
|
2020-10-02 16:54:07 +00:00
|
|
|
for n in list(cluster.instances.values()):
|
2020-06-13 18:33:58 +00:00
|
|
|
n.query('SYSTEM FLUSH LOGS')
|
|
|
|
|
|
|
|
rows = query_node.query("""
|
|
|
|
SELECT c.host_name
|
|
|
|
FROM (
|
|
|
|
SELECT _shard_num
|
|
|
|
FROM cluster(shards_cluster, system.query_log)
|
|
|
|
WHERE
|
|
|
|
initial_query_id = '{query_id}' AND
|
|
|
|
is_initial_query = 0 AND
|
|
|
|
type = 'QueryFinish'
|
|
|
|
ORDER BY event_date DESC, event_time DESC
|
|
|
|
LIMIT 1
|
|
|
|
) a
|
|
|
|
JOIN system.clusters c
|
2021-01-03 23:20:12 +00:00
|
|
|
ON a._shard_num = c.shard_num WHERE cluster = 'shards_cluster'
|
2020-06-13 18:33:58 +00:00
|
|
|
""".format(query_id=query_id))
|
|
|
|
return rows.strip()
|
|
|
|
|
2020-09-16 04:26:10 +00:00
|
|
|
|
2020-06-13 18:33:58 +00:00
|
|
|
# TODO: right now random distribution looks bad, but works
|
|
|
|
def test_load_balancing_default():
|
|
|
|
unique_nodes = set()
|
|
|
|
for _ in range(0, queries):
|
|
|
|
unique_nodes.add(get_node(n1, settings={'load_balancing': 'random'}))
|
|
|
|
assert len(unique_nodes) == nodes, unique_nodes
|
|
|
|
|
2020-09-16 04:26:10 +00:00
|
|
|
|
2020-06-13 18:33:58 +00:00
|
|
|
def test_load_balancing_nearest_hostname():
|
|
|
|
unique_nodes = set()
|
|
|
|
for _ in range(0, queries):
|
|
|
|
unique_nodes.add(get_node(n1, settings={'load_balancing': 'nearest_hostname'}))
|
|
|
|
assert len(unique_nodes) == 1, unique_nodes
|
|
|
|
assert unique_nodes == set(['n1'])
|
|
|
|
|
2020-09-16 04:26:10 +00:00
|
|
|
|
2020-06-13 18:33:58 +00:00
|
|
|
def test_load_balancing_in_order():
|
|
|
|
unique_nodes = set()
|
|
|
|
for _ in range(0, queries):
|
|
|
|
unique_nodes.add(get_node(n1, settings={'load_balancing': 'in_order'}))
|
|
|
|
assert len(unique_nodes) == 1, unique_nodes
|
|
|
|
assert unique_nodes == set(['n1'])
|
|
|
|
|
2020-09-16 04:26:10 +00:00
|
|
|
|
2020-06-13 18:33:58 +00:00
|
|
|
def test_load_balancing_first_or_random():
|
|
|
|
unique_nodes = set()
|
|
|
|
for _ in range(0, queries):
|
|
|
|
unique_nodes.add(get_node(n1, settings={'load_balancing': 'first_or_random'}))
|
|
|
|
assert len(unique_nodes) == 1, unique_nodes
|
|
|
|
assert unique_nodes == set(['n1'])
|
|
|
|
|
2020-09-16 04:26:10 +00:00
|
|
|
|
2020-06-13 18:33:58 +00:00
|
|
|
def test_load_balancing_round_robin():
|
|
|
|
unique_nodes = set()
|
|
|
|
for _ in range(0, nodes):
|
|
|
|
unique_nodes.add(get_node(n1, settings={'load_balancing': 'round_robin'}))
|
|
|
|
assert len(unique_nodes) == nodes, unique_nodes
|
|
|
|
assert unique_nodes == set(['n1', 'n2', 'n3'])
|
2020-06-13 20:01:17 +00:00
|
|
|
|
2020-09-16 04:26:10 +00:00
|
|
|
|
2020-06-30 08:09:06 +00:00
|
|
|
@pytest.mark.parametrize('dist_table', [
|
|
|
|
('dist_priority'),
|
|
|
|
('dist_priority_negative'),
|
|
|
|
])
|
|
|
|
def test_load_balancing_priority_round_robin(dist_table):
|
2020-06-27 07:29:47 +00:00
|
|
|
unique_nodes = set()
|
|
|
|
for _ in range(0, nodes):
|
2020-06-30 08:09:06 +00:00
|
|
|
unique_nodes.add(get_node(n1, dist_table, settings={'load_balancing': 'round_robin'}))
|
2020-06-27 07:29:47 +00:00
|
|
|
assert len(unique_nodes) == 2, unique_nodes
|
|
|
|
# n2 has bigger priority in config
|
|
|
|
assert unique_nodes == set(['n1', 'n3'])
|
|
|
|
|
2020-09-16 04:26:10 +00:00
|
|
|
|
2020-06-13 20:01:17 +00:00
|
|
|
def test_distributed_replica_max_ignored_errors():
|
|
|
|
settings = {
|
2021-02-16 07:56:45 +00:00
|
|
|
'use_hedged_requests' : 0,
|
2020-06-13 20:01:17 +00:00
|
|
|
'load_balancing': 'in_order',
|
|
|
|
'prefer_localhost_replica': 0,
|
|
|
|
'connect_timeout': 2,
|
|
|
|
'receive_timeout': 2,
|
|
|
|
'send_timeout': 2,
|
|
|
|
'idle_connection_timeout': 2,
|
|
|
|
'tcp_keep_alive_timeout': 2,
|
|
|
|
|
|
|
|
'distributed_replica_max_ignored_errors': 0,
|
|
|
|
'distributed_replica_error_half_life': 60,
|
|
|
|
}
|
|
|
|
|
|
|
|
# initiate connection (if started only this test)
|
|
|
|
n2.query('SELECT * FROM dist', settings=settings)
|
|
|
|
cluster.pause_container('n1')
|
|
|
|
|
|
|
|
# n1 paused -- skipping, and increment error_count for n1
|
|
|
|
# but the query succeeds, no need in query_and_get_error()
|
|
|
|
n2.query('SELECT * FROM dist', settings=settings)
|
|
|
|
# XXX: due to config reloading we need second time (sigh)
|
|
|
|
n2.query('SELECT * FROM dist', settings=settings)
|
|
|
|
# check error_count for n1
|
|
|
|
assert int(n2.query("""
|
|
|
|
SELECT errors_count FROM system.clusters
|
|
|
|
WHERE cluster = 'replicas_cluster' AND host_name = 'n1'
|
|
|
|
""", settings=settings)) == 1
|
|
|
|
|
|
|
|
cluster.unpause_container('n1')
|
|
|
|
# still n2
|
|
|
|
assert get_node(n2, settings=settings) == 'n2'
|
|
|
|
# now n1
|
|
|
|
settings['distributed_replica_max_ignored_errors'] = 1
|
|
|
|
assert get_node(n2, settings=settings) == 'n1'
|