More integration tests, all pass, at least usually

This commit is contained in:
Jakub Kuklis 2021-12-07 12:48:39 +01:00
parent afc64c14ce
commit 312ea92989
3 changed files with 244 additions and 2 deletions

View File

@ -15,3 +15,7 @@ log_file = pytest.log
log_file_level = DEBUG
log_file_format = %(asctime)s [ %(process)d ] %(levelname)s : %(message)s (%(filename)s:%(lineno)s, %(funcName)s)
log_file_date_format = %Y-%m-%d %H:%M:%S
markers =
long_run: marks tests which run for a long time
addopts =
-m 'not long_run'

View File

@ -10,7 +10,12 @@
<!-- default credentials for Azurite storage account -->
<account_name>devstoreaccount1</account_name>
<account_key>Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==</account_key>
<max_single_part_upload_size>33554432</max_single_part_upload_size>
</blob_storage_disk>
<hdd>
<type>local</type>
<path>/</path>
</hdd>
</disks>
<policies>
<blob_storage_policy>
@ -18,6 +23,9 @@
<main>
<disk>blob_storage_disk</disk>
</main>
<external>
<disk>hdd</disk>
</external>
</volumes>
</blob_storage_policy>
</policies>

View File

@ -9,14 +9,37 @@ import pytest
from helpers.cluster import ClickHouseCluster, get_instances_dir
# By default the exceptions that was throwed in threads will be ignored
# (they will not mark the test as failed, only printed to stderr).
#
# Wrap thrading.Thread and re-throw exception on join()
class SafeThread(threading.Thread):
def __init__(self, target):
super().__init__()
self.target = target
self.exception = None
def run(self):
try:
self.target()
except Exception as e: # pylint: disable=broad-except
self.exception = e
def join(self, timeout=None):
super().join(timeout)
if self.exception:
raise self.exception
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
CONFIG_PATH = os.path.join(SCRIPT_DIR, './{}/node/configs/config.d/storage_conf.xml'.format(get_instances_dir()))
NODE_NAME = "node"
TABLE_NAME = "blob_storage_test"
TABLE_NAME = "blob_storage_table"
BLOB_STORAGE_DISK = "blob_storage_disk"
LOCAL_DISK = "hdd"
# TODO: move these functions copied from s3 to an utility file?
# TODO: these tests resemble S3 tests a lot, utility functions and tests themselves could be abstracted
def random_string(length):
letters = string.ascii_letters
return ''.join(random.choice(letters) for i in range(length))
@ -28,6 +51,16 @@ def generate_values(date_str, count, sign=1):
return ",".join(["('{}',{},'{}')".format(x, y, z) for x, y, z in data])
def replace_config(old, new):
config = open(CONFIG_PATH, 'r')
config_lines = config.readlines()
config.close()
config_lines = [line.replace(old, new) for line in config_lines]
config = open(CONFIG_PATH, 'w')
config.writelines(config_lines)
config.close()
@pytest.fixture(scope="module")
def cluster():
try:
@ -134,3 +167,200 @@ def test_insert_same_partition_and_merge(cluster, merge_vertical):
assert node.query(f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)"
assert node.query(f"SELECT count(distinct(id)) FROM {TABLE_NAME} FORMAT Values") == "(8192)"
def test_alter_table_columns(cluster):
node = cluster.instances[NODE_NAME]
create_table(node, TABLE_NAME)
node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096)}")
node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096, -1)}")
node.query(f"ALTER TABLE {TABLE_NAME} ADD COLUMN col1 UInt64 DEFAULT 1")
# To ensure parts have been merged
node.query(f"OPTIMIZE TABLE {TABLE_NAME}")
assert node.query(f"SELECT sum(col1) FROM {TABLE_NAME} FORMAT Values") == "(8192)"
assert node.query(f"SELECT sum(col1) FROM {TABLE_NAME} WHERE id > 0 FORMAT Values") == "(4096)"
node.query(f"ALTER TABLE {TABLE_NAME} MODIFY COLUMN col1 String", settings={"mutations_sync": 2})
assert node.query(f"SELECT distinct(col1) FROM {TABLE_NAME} FORMAT Values") == "('1')"
def test_attach_detach_partition(cluster):
node = cluster.instances[NODE_NAME]
create_table(node, TABLE_NAME)
node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096)}")
node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-04', 4096)}")
assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(8192)"
node.query(f"ALTER TABLE {TABLE_NAME} DETACH PARTITION '2020-01-03'")
assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(4096)"
node.query(f"ALTER TABLE {TABLE_NAME} ATTACH PARTITION '2020-01-03'")
assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(8192)"
node.query(f"ALTER TABLE {TABLE_NAME} DROP PARTITION '2020-01-03'")
assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(4096)"
node.query(f"ALTER TABLE {TABLE_NAME} DETACH PARTITION '2020-01-04'")
node.query(f"ALTER TABLE {TABLE_NAME} DROP DETACHED PARTITION '2020-01-04'", settings={"allow_drop_detached": 1})
assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(0)"
def test_move_partition_to_another_disk(cluster):
node = cluster.instances[NODE_NAME]
create_table(node, TABLE_NAME)
node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096)}")
node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-04', 4096)}")
assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(8192)"
node.query(f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-04' TO DISK '{LOCAL_DISK}'")
assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(8192)"
node.query(f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-04' TO DISK '{BLOB_STORAGE_DISK}'")
assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(8192)"
def test_table_manipulations(cluster):
node = cluster.instances[NODE_NAME]
create_table(node, TABLE_NAME)
renamed_table = TABLE_NAME + "_renamed"
node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096)}")
node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-04', 4096)}")
node.query(f"RENAME TABLE {TABLE_NAME} TO {renamed_table}")
assert node.query(f"SELECT count(*) FROM {renamed_table} FORMAT Values") == "(8192)"
node.query(f"RENAME TABLE {renamed_table} TO {TABLE_NAME}")
assert node.query(f"CHECK TABLE {TABLE_NAME} FORMAT Values") == "(1)"
node.query(f"DETACH TABLE {TABLE_NAME}")
node.query(f"ATTACH TABLE {TABLE_NAME}")
assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(8192)"
node.query(f"TRUNCATE TABLE {TABLE_NAME}")
assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(0)"
def test_move_replace_partition_to_another_table(cluster):
node = cluster.instances[NODE_NAME]
create_table(node, TABLE_NAME)
table_clone_name = TABLE_NAME + "_clone"
node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096)}")
node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-04', 4096)}")
node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-05', 4096, -1)}")
node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-06', 4096, -1)}")
assert node.query(f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)"
assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(16384)"
create_table(node, table_clone_name)
node.query(f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-03' TO TABLE {table_clone_name}")
node.query(f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-05' TO TABLE {table_clone_name}")
assert node.query(f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)"
assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(8192)"
assert node.query(f"SELECT sum(id) FROM {table_clone_name} FORMAT Values") == "(0)"
assert node.query(f"SELECT count(*) FROM {table_clone_name} FORMAT Values") == "(8192)"
# Add new partitions to source table, but with different values and replace them from copied table.
node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096, -1)}")
node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-05', 4096)}")
assert node.query(f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)"
assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(16384)"
node.query(f"ALTER TABLE {TABLE_NAME} REPLACE PARTITION '2020-01-03' FROM {table_clone_name}")
node.query(f"ALTER TABLE {TABLE_NAME} REPLACE PARTITION '2020-01-05' FROM {table_clone_name}")
assert node.query(f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)"
assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(16384)"
assert node.query(f"SELECT sum(id) FROM {table_clone_name} FORMAT Values") == "(0)"
assert node.query(f"SELECT count(*) FROM {table_clone_name} FORMAT Values") == "(8192)"
node.query(f"DROP TABLE {table_clone_name} NO DELAY")
assert node.query(f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)"
assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(16384)"
node.query(f"ALTER TABLE {TABLE_NAME} FREEZE")
node.query(f"DROP TABLE {TABLE_NAME} NO DELAY")
def test_freeze_unfreeze(cluster):
node = cluster.instances[NODE_NAME]
create_table(node, TABLE_NAME)
backup1 = 'backup1'
backup2 = 'backup2'
node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096)}")
node.query(f"ALTER TABLE {TABLE_NAME} FREEZE WITH NAME '{backup1}'")
node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-04', 4096)}")
node.query(f"ALTER TABLE {TABLE_NAME} FREEZE WITH NAME '{backup2}'")
node.query(f"TRUNCATE TABLE {TABLE_NAME}")
# Unfreeze single partition from backup1.
node.query(f"ALTER TABLE {TABLE_NAME} UNFREEZE PARTITION '2020-01-03' WITH NAME '{backup1}'")
# Unfreeze all partitions from backup2.
node.query(f"ALTER TABLE {TABLE_NAME} UNFREEZE WITH NAME '{backup2}'")
def test_apply_new_settings(cluster):
node = cluster.instances[NODE_NAME]
create_table(node, TABLE_NAME)
node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096)}")
# Force multi-part upload mode.
replace_config("<max_single_part_upload_size>33554432</max_single_part_upload_size>",
"<max_single_part_upload_size>4096</max_single_part_upload_size>")
node.query("SYSTEM RELOAD CONFIG")
node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-04', 4096, -1)}")
# NOTE: this test takes a couple of minutes when run together with other tests
@pytest.mark.long_run
def test_restart_during_load(cluster):
node = cluster.instances[NODE_NAME]
create_table(node, TABLE_NAME)
# Force multi-part upload mode.
replace_config("<container_already_exists>false</container_already_exists>", "")
node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-04', 1024 * 1024)}")
node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-05', 1024 * 1024, -1)}")
def read():
for ii in range(0, 5):
logging.info(f"Executing {ii} query")
assert node.query(f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)"
logging.info(f"Query {ii} executed")
time.sleep(0.2)
def restart_disk():
for iii in range(0, 2):
logging.info(f"Restarting disk, attempt {iii}")
node.query(f"SYSTEM RESTART DISK {BLOB_STORAGE_DISK}")
logging.info(f"Disk restarted, attempt {iii}")
time.sleep(0.5)
threads = []
for _ in range(0, 4):
threads.append(SafeThread(target=read))
threads.append(SafeThread(target=restart_disk))
for thread in threads:
thread.start()
for thread in threads:
thread.join()