ClickHouse/dbms/tests/integration/test_insert_into_distributed/test.py

125 lines
4.6 KiB
Python
Raw Normal View History

2017-06-14 14:38:08 +00:00
import pytest
import time
from helpers.cluster import ClickHouseCluster
from helpers.network import PartitionManager
2017-07-11 11:44:16 +00:00
from helpers.test_tools import TSV
2017-06-14 14:38:08 +00:00
cluster = ClickHouseCluster(__file__)
2017-07-11 11:44:16 +00:00
instance_test_reconnect = cluster.add_instance('instance_test_reconnect', main_configs=['configs/remote_servers.xml'])
instance_test_inserts_batching = cluster.add_instance(
'instance_test_inserts_batching',
main_configs=['configs/remote_servers.xml'], user_configs=['configs/enable_distributed_inserts_batching.xml'])
remote = cluster.add_instance('remote', user_configs=['configs/forbid_background_merges.xml'])
2017-06-14 14:38:08 +00:00
2017-08-23 10:45:23 +00:00
instance_test_inserts_local_cluster = cluster.add_instance(
'instance_test_inserts_local_cluster',
main_configs=['configs/remote_servers.xml'])
2017-06-14 14:38:08 +00:00
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
2017-07-11 11:44:16 +00:00
remote.query("CREATE TABLE local1 (x UInt32) ENGINE = Log")
instance_test_reconnect.query('''
CREATE TABLE distributed (x UInt32) ENGINE = Distributed('test_cluster', 'default', 'local1')
''')
2017-06-14 14:38:08 +00:00
2017-07-21 11:58:18 +00:00
remote.query("CREATE TABLE local2 (d Date, x UInt32, s String) ENGINE = MergeTree(d, x, 8192)")
2017-07-11 11:44:16 +00:00
instance_test_inserts_batching.query('''
CREATE TABLE distributed (d Date, x UInt32) ENGINE = Distributed('test_cluster', 'default', 'local2')
2017-08-23 10:45:23 +00:00
''')
instance_test_inserts_local_cluster.query("CREATE TABLE local (d Date, x UInt32) ENGINE = MergeTree(d, x, 8192)")
instance_test_inserts_local_cluster.query('''
CREATE TABLE distributed_on_local (d Date, x UInt32) ENGINE = Distributed('test_local_cluster', 'default', 'local')
2017-06-14 14:38:08 +00:00
''')
yield cluster
finally:
cluster.shutdown()
def test_reconnect(started_cluster):
2017-07-11 11:44:16 +00:00
instance = instance_test_reconnect
2017-06-14 14:38:08 +00:00
with PartitionManager() as pm:
# Open a connection for insertion.
2017-07-11 11:44:16 +00:00
instance.query("INSERT INTO distributed VALUES (1)")
2017-06-14 14:38:08 +00:00
time.sleep(0.5)
2017-07-11 11:44:16 +00:00
assert remote.query("SELECT count(*) FROM local1").strip() == '1'
2017-06-14 14:38:08 +00:00
# Now break the connection.
2017-07-11 11:44:16 +00:00
pm.partition_instances(instance, remote, action='REJECT --reject-with tcp-reset')
instance.query("INSERT INTO distributed VALUES (2)")
2017-06-14 14:38:08 +00:00
time.sleep(0.5)
# Heal the partition and insert more data.
# The connection must be reestablished and after some time all data must be inserted.
pm.heal_all()
2017-07-11 11:44:16 +00:00
instance.query("INSERT INTO distributed VALUES (3)")
2017-06-14 14:38:08 +00:00
time.sleep(0.5)
2017-07-11 11:44:16 +00:00
assert remote.query("SELECT count(*) FROM local1").strip() == '3'
def test_inserts_batching(started_cluster):
instance = instance_test_inserts_batching
with PartitionManager() as pm:
pm.partition_instances(instance, remote)
instance.query("INSERT INTO distributed(d, x) VALUES ('2000-01-01', 1)")
# Sleep a bit so that this INSERT forms a batch of its own.
time.sleep(0.1)
instance.query("INSERT INTO distributed(x, d) VALUES (2, '2000-01-01')")
for i in range(3, 7):
instance.query("INSERT INTO distributed(d, x) VALUES ('2000-01-01', {})".format(i))
for i in range(7, 9):
instance.query("INSERT INTO distributed(x, d) VALUES ({}, '2000-01-01')".format(i))
instance.query("INSERT INTO distributed(d, x) VALUES ('2000-01-01', 9)")
2017-07-21 11:58:18 +00:00
# After ALTER the structure of the saved blocks will be different
instance.query("ALTER TABLE distributed ADD COLUMN s String")
for i in range(10, 13):
instance.query("INSERT INTO distributed(d, x) VALUES ('2000-01-01', {})".format(i))
2017-07-11 11:44:16 +00:00
time.sleep(1.0)
result = remote.query("SELECT _part, groupArray(x) FROM local2 GROUP BY _part ORDER BY _part")
# Explanation: as merges are turned off on remote instance, active parts in local2 table correspond 1-to-1
# to inserted blocks.
# Batches of max 3 rows are formed as min_insert_block_size_rows = 3.
# Blocks:
# 1. Failed batch that is retried with the same contents.
# 2. Full batch of inserts with (d, x) order of columns.
# 3. Full batch of inserts with (x, d) order of columns.
2017-07-21 11:58:18 +00:00
# 4. Full batch of inserts after ALTER (that have different block structure).
# 5. What was left to insert with (d, x) order before ALTER.
2017-07-11 11:44:16 +00:00
expected = '''\
20000101_20000101_1_1_0 [1]
20000101_20000101_2_2_0 [3,4,5]
20000101_20000101_3_3_0 [2,7,8]
2017-07-21 11:58:18 +00:00
20000101_20000101_4_4_0 [10,11,12]
20000101_20000101_5_5_0 [6,9]
2017-07-11 11:44:16 +00:00
'''
assert TSV(result) == TSV(expected)
2017-08-23 10:45:23 +00:00
def test_inserts_local(started_cluster):
instance = instance_test_inserts_local_cluster
instance.query("INSERT INTO distributed_on_local VALUES ('2000-01-01', 1)")
time.sleep(0.5)
assert instance.query("SELECT count(*) FROM local").strip() == '1'