Added new changes to test_azure_blob_storage_zero_copy_replication

This commit is contained in:
Smita Kulkarni 2023-10-26 11:40:44 +02:00
parent 72f6a3b029
commit ec21560a3b
3 changed files with 82 additions and 59 deletions

View File

@ -0,0 +1,27 @@
<clickhouse>
<remote_servers>
<test_cluster>
<shard>
<replica>
<host>node1</host>
<port>9000</port>
</replica>
</shard>
<shard>
<replica>
<host>node2</host>
<port>9000</port>
</replica>
</shard>
</test_cluster>
</remote_servers>
<macros>
<cluster>test_cluster</cluster>
</macros>
<merge_tree>
<allow_remote_fs_zero_copy_replication>true</allow_remote_fs_zero_copy_replication>
<ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
</merge_tree>
</clickhouse>

View File

@ -1,50 +0,0 @@
<clickhouse>
<storage_configuration>
<disks>
<blob_storage_disk>
<type>azure_blob_storage</type>
<storage_account_url>http://azurite1:10000/devstoreaccount1</storage_account_url>
<container_name>cont</container_name>
<skip_access_check>false</skip_access_check>
<!-- default credentials for Azurite storage account -->
<account_name>devstoreaccount1</account_name>
<account_key>Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==</account_key>
</blob_storage_disk>
</disks>
<policies>
<blob_storage_policy>
<volumes>
<main>
<disk>blob_storage_disk</disk>
</main>
</volumes>
</blob_storage_policy>
</policies>
</storage_configuration>
<remote_servers>
<test_cluster>
<shard>
<replica>
<host>node1</host>
<port>9000</port>
</replica>
</shard>
<shard>
<replica>
<host>node2</host>
<port>9000</port>
</replica>
</shard>
</test_cluster>
</remote_servers>
<macros>
<cluster>test_cluster</cluster>
</macros>
<merge_tree>
<allow_remote_fs_zero_copy_replication>true</allow_remote_fs_zero_copy_replication>
<ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
</merge_tree>
</clickhouse>

View File

@ -1,6 +1,8 @@
import logging
import pytest
from helpers.cluster import ClickHouseCluster
from test_storage_azure_blob_storage.test import azure_query
import os
logging.getLogger().setLevel(logging.INFO)
@ -14,21 +16,65 @@ CLUSTER_NAME = "test_cluster"
drop_table_statement = f"DROP TABLE {TABLE_NAME} ON CLUSTER {CLUSTER_NAME} SYNC"
def generate_cluster_def(port):
path = os.path.join(
os.path.dirname(os.path.realpath(__file__)),
"./_gen/storage_conf.xml",
)
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, "w") as f:
f.write(
f"""<clickhouse>
<storage_configuration>
<disks>
<blob_storage_disk>
<type>azure_blob_storage</type>
<storage_account_url>http://azurite1:{port}/devstoreaccount1</storage_account_url>
<container_name>cont</container_name>
<skip_access_check>false</skip_access_check>
<!-- default credentials for Azurite storage account -->
<account_name>devstoreaccount1</account_name>
<account_key>Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==</account_key>
</blob_storage_disk>
</disks>
<policies>
<blob_storage_policy>
<volumes>
<main>
<disk>blob_storage_disk</disk>
</main>
</volumes>
</blob_storage_policy>
</policies>
</storage_configuration>
</clickhouse>
"""
)
return path
@pytest.fixture(scope="module")
def cluster():
try:
cluster = ClickHouseCluster(__file__)
port = cluster.azurite_port
path = generate_cluster_def(port)
cluster.add_instance(
NODE1,
main_configs=["configs/config.d/storage_conf.xml"],
main_configs=[
"configs/config.d/config.xml",
path,
],
macros={"replica": "1"},
with_azurite=True,
with_zookeeper=True,
)
cluster.add_instance(
NODE2,
main_configs=["configs/config.d/storage_conf.xml"],
main_configs=[
"configs/config.d/config.xml",
path,
],
macros={"replica": "2"},
with_azurite=True,
with_zookeeper=True,
@ -57,7 +103,7 @@ def create_table(node, table_name, replica, **additional_settings):
ORDER BY id
SETTINGS {",".join((k+"="+repr(v) for k, v in settings.items()))}"""
node.query(create_table_statement)
azure_query(node, create_table_statement)
assert node.query(f"SELECT COUNT(*) FROM {table_name} FORMAT Values") == "(0)"
@ -80,27 +126,27 @@ def test_zero_copy_replication(cluster):
values1 = "(0,'data'),(1,'data')"
values2 = "(2,'data'),(3,'data')"
node1.query(f"INSERT INTO {TABLE_NAME} VALUES {values1}")
azure_query(node1,f"INSERT INTO {TABLE_NAME} VALUES {values1}")
node2.query(f"SYSTEM SYNC REPLICA {TABLE_NAME}")
assert (
node1.query(f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values") == values1
azure_query(node1,f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values") == values1
)
assert (
node2.query(f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values") == values1
azure_query(node2,f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values") == values1
)
# Based on version 21.x - should be only one file with size 100+ (checksums.txt), used by both nodes
assert get_large_objects_count(blob_container_client) == 1
node2.query(f"INSERT INTO {TABLE_NAME} VALUES {values2}")
azure_query(node2,f"INSERT INTO {TABLE_NAME} VALUES {values2}")
node1.query(f"SYSTEM SYNC REPLICA {TABLE_NAME}")
assert (
node2.query(f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values")
azure_query(node2,f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values")
== values1 + "," + values2
)
assert (
node1.query(f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values")
azure_query(node1,f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values")
== values1 + "," + values2
)