ClickHouse/dbms/tests/integration/test_merge_tree_s3/test.py

92 lines
2.9 KiB
Python
Raw Normal View History

import logging
import random
import string
import pytest
from helpers.cluster import ClickHouseCluster
logging.getLogger().setLevel(logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler())
# Creates S3 bucket for tests and allows anonymous read-write access to it.
def prepare_s3_bucket(cluster):
minio_client = cluster.minio_client
if minio_client.bucket_exists(cluster.minio_bucket):
minio_client.remove_bucket(cluster.minio_bucket)
minio_client.make_bucket(cluster.minio_bucket)
@pytest.fixture(scope="module")
def cluster():
try:
cluster = ClickHouseCluster(__file__)
cluster.add_instance("node", config_dir="configs", with_minio=True)
logging.info("Starting cluster...")
cluster.start()
logging.info("Cluster started")
prepare_s3_bucket(cluster)
logging.info("S3 bucket created")
yield cluster
finally:
cluster.shutdown()
def random_string(length):
letters = string.ascii_letters
return ''.join(random.choice(letters) for i in range(length))
def generate_values(date_str, count):
data = [[date_str, i, random_string(10)] for i in range(count)]
data.sort(key=lambda tup: tup[1])
return ",".join(["('{}',{},'{}')".format(x, y, z) for x, y, z in data])
@pytest.mark.parametrize(
"min_rows_for_wide_part,files_overhead,files_per_part",
[
(0, 1, 14),
(8192, 1, 10)
]
)
def test_log_family_s3(cluster, min_rows_for_wide_part, files_overhead, files_per_part):
node = cluster.instances["node"]
minio = cluster.minio_client
node.query(
"""
CREATE TABLE s3_test(
dt Date,
id UInt64,
data String,
INDEX min_max (id) TYPE minmax GRANULARITY 3
) ENGINE=MergeTree()
PARTITION BY dt
ORDER BY (dt, id)
SETTINGS disable_background_merges='true', index_granularity=512, min_rows_for_wide_part={}
"""
.format(min_rows_for_wide_part)
)
assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == 1
values1 = generate_values('2020-01-03', 4096)
node.query("INSERT INTO s3_test VALUES {}".format(values1))
assert node.query("SELECT * FROM s3_test order by dt, id FORMAT Values") == values1
assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == files_overhead + files_per_part
values2 = generate_values('2020-01-04', 4096)
node.query("INSERT INTO s3_test VALUES {}".format(values2))
assert node.query("SELECT * FROM s3_test ORDER BY dt, id FORMAT Values") == values1 + "," + values2
assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == files_overhead + 2 * files_per_part
assert node.query("SELECT count(*) FROM s3_test where id = 0 FORMAT Values") == "(2)"
node.query("DROP TABLE s3_test")
assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == 0