2021-04-22 18:04:32 +00:00
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
import time
|
|
|
|
import logging
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
from helpers.cluster import ClickHouseCluster
|
|
|
|
from helpers.test_tools import TSV
|
|
|
|
|
|
|
|
import docker
|
|
|
|
|
|
|
|
CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
sys.path.insert(0, os.path.dirname(CURRENT_TEST_DIR))
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
cluster = ClickHouseCluster(__file__, name="copier_test_two_nodes")
|
2021-04-22 18:04:32 +00:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
|
|
def started_cluster():
|
|
|
|
global cluster
|
|
|
|
try:
|
|
|
|
|
2021-06-08 01:50:43 +00:00
|
|
|
for name in ["first_of_two", "second_of_two"]:
|
2022-03-22 16:39:58 +00:00
|
|
|
instance = cluster.add_instance(
|
|
|
|
name,
|
2021-04-23 14:39:03 +00:00
|
|
|
main_configs=[
|
2021-06-01 22:03:08 +00:00
|
|
|
"configs_two_nodes/conf.d/clusters.xml",
|
2021-04-23 14:39:03 +00:00
|
|
|
"configs_two_nodes/conf.d/ddl.xml",
|
2022-03-22 16:39:58 +00:00
|
|
|
"configs_two_nodes/conf.d/storage_configuration.xml",
|
|
|
|
],
|
2021-04-23 14:39:03 +00:00
|
|
|
user_configs=["configs_two_nodes/users.xml"],
|
2022-03-22 16:39:58 +00:00
|
|
|
with_zookeeper=True,
|
|
|
|
)
|
2021-04-22 18:04:32 +00:00
|
|
|
|
|
|
|
cluster.start()
|
2021-04-23 14:39:03 +00:00
|
|
|
|
2021-06-08 01:50:43 +00:00
|
|
|
for name in ["first_of_two", "second_of_two"]:
|
2021-04-23 14:39:03 +00:00
|
|
|
instance = cluster.instances[name]
|
2022-03-22 16:39:58 +00:00
|
|
|
instance.exec_in_container(["bash", "-c", "mkdir /jbod1"])
|
|
|
|
instance.exec_in_container(["bash", "-c", "mkdir /jbod2"])
|
|
|
|
instance.exec_in_container(["bash", "-c", "mkdir /external"])
|
2021-04-23 14:39:03 +00:00
|
|
|
|
2021-04-22 18:04:32 +00:00
|
|
|
yield cluster
|
|
|
|
|
|
|
|
finally:
|
|
|
|
cluster.shutdown()
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
|
2021-04-22 18:04:32 +00:00
|
|
|
# Will copy table from `first` node to `second`
|
|
|
|
class TaskWithDifferentSchema:
|
|
|
|
def __init__(self, cluster):
|
|
|
|
self.cluster = cluster
|
2022-03-22 16:39:58 +00:00
|
|
|
self.zk_task_path = "/clickhouse-copier/task_with_different_schema"
|
2021-04-22 18:04:32 +00:00
|
|
|
self.container_task_file = "/task_with_different_schema.xml"
|
|
|
|
|
|
|
|
for instance_name, _ in cluster.instances.items():
|
|
|
|
instance = cluster.instances[instance_name]
|
2022-03-22 16:39:58 +00:00
|
|
|
instance.copy_file_to_container(
|
|
|
|
os.path.join(CURRENT_TEST_DIR, "./task_with_different_schema.xml"),
|
|
|
|
self.container_task_file,
|
|
|
|
)
|
|
|
|
print(
|
|
|
|
"Copied task file to container of '{}' instance. Path {}".format(
|
|
|
|
instance_name, self.container_task_file
|
|
|
|
)
|
|
|
|
)
|
2021-04-22 18:04:32 +00:00
|
|
|
|
|
|
|
def start(self):
|
2021-06-08 01:50:43 +00:00
|
|
|
first = cluster.instances["first_of_two"]
|
|
|
|
second = cluster.instances["second_of_two"]
|
|
|
|
|
|
|
|
first.query("DROP DATABASE IF EXISTS db_different_schema SYNC")
|
|
|
|
second.query("DROP DATABASE IF EXISTS db_different_schema SYNC")
|
|
|
|
|
|
|
|
first.query("CREATE DATABASE IF NOT EXISTS db_different_schema;")
|
2022-03-22 16:39:58 +00:00
|
|
|
first.query(
|
|
|
|
"""CREATE TABLE db_different_schema.source
|
2021-04-22 18:04:32 +00:00
|
|
|
(
|
|
|
|
Column1 String,
|
|
|
|
Column2 UInt32,
|
|
|
|
Column3 Date,
|
|
|
|
Column4 DateTime,
|
|
|
|
Column5 UInt16,
|
|
|
|
Column6 String,
|
|
|
|
Column7 String,
|
|
|
|
Column8 String,
|
|
|
|
Column9 String,
|
|
|
|
Column10 String,
|
|
|
|
Column11 String,
|
|
|
|
Column12 Decimal(3, 1),
|
|
|
|
Column13 DateTime,
|
|
|
|
Column14 UInt16
|
|
|
|
)
|
|
|
|
ENGINE = MergeTree()
|
|
|
|
PARTITION BY (toYYYYMMDD(Column3), Column3)
|
|
|
|
PRIMARY KEY (Column1, Column2, Column3, Column4, Column6, Column7, Column8, Column9)
|
|
|
|
ORDER BY (Column1, Column2, Column3, Column4, Column6, Column7, Column8, Column9)
|
2022-03-22 16:39:58 +00:00
|
|
|
SETTINGS index_granularity = 8192"""
|
|
|
|
)
|
2021-04-22 18:04:32 +00:00
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
first.query(
|
|
|
|
"""INSERT INTO db_different_schema.source SELECT * FROM generateRandom(
|
2021-04-22 18:04:32 +00:00
|
|
|
'Column1 String, Column2 UInt32, Column3 Date, Column4 DateTime, Column5 UInt16,
|
|
|
|
Column6 String, Column7 String, Column8 String, Column9 String, Column10 String,
|
2022-03-22 16:39:58 +00:00
|
|
|
Column11 String, Column12 Decimal(3, 1), Column13 DateTime, Column14 UInt16', 1, 10, 2) LIMIT 50;"""
|
|
|
|
)
|
2021-06-08 01:50:43 +00:00
|
|
|
|
|
|
|
second.query("CREATE DATABASE IF NOT EXISTS db_different_schema;")
|
2022-03-22 16:39:58 +00:00
|
|
|
second.query(
|
|
|
|
"""CREATE TABLE db_different_schema.destination
|
2021-04-22 18:04:32 +00:00
|
|
|
(
|
|
|
|
Column1 LowCardinality(String) CODEC(LZ4),
|
|
|
|
Column2 UInt32 CODEC(LZ4),
|
|
|
|
Column3 Date CODEC(DoubleDelta, LZ4),
|
|
|
|
Column4 DateTime CODEC(DoubleDelta, LZ4),
|
|
|
|
Column5 UInt16 CODEC(LZ4),
|
|
|
|
Column6 LowCardinality(String) CODEC(ZSTD),
|
|
|
|
Column7 LowCardinality(String) CODEC(ZSTD),
|
|
|
|
Column8 LowCardinality(String) CODEC(ZSTD),
|
|
|
|
Column9 LowCardinality(String) CODEC(ZSTD),
|
|
|
|
Column10 String CODEC(ZSTD(6)),
|
|
|
|
Column11 LowCardinality(String) CODEC(LZ4),
|
|
|
|
Column12 Decimal(3,1) CODEC(LZ4),
|
|
|
|
Column13 DateTime CODEC(DoubleDelta, LZ4),
|
|
|
|
Column14 UInt16 CODEC(LZ4)
|
|
|
|
) ENGINE = MergeTree()
|
|
|
|
PARTITION BY toYYYYMMDD(Column3)
|
2022-03-22 16:39:58 +00:00
|
|
|
ORDER BY (Column9, Column1, Column2, Column3, Column4);"""
|
|
|
|
)
|
2021-06-01 22:03:08 +00:00
|
|
|
|
2021-04-22 18:04:32 +00:00
|
|
|
print("Preparation completed")
|
|
|
|
|
|
|
|
def check(self):
|
2021-06-08 01:50:43 +00:00
|
|
|
first = cluster.instances["first_of_two"]
|
|
|
|
second = cluster.instances["second_of_two"]
|
2021-04-22 18:04:32 +00:00
|
|
|
|
|
|
|
a = first.query("SELECT count() from db_different_schema.source")
|
|
|
|
b = second.query("SELECT count() from db_different_schema.destination")
|
|
|
|
assert a == b, "Count"
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
a = TSV(
|
|
|
|
first.query(
|
|
|
|
"""SELECT sipHash64(*) from db_different_schema.source
|
|
|
|
ORDER BY (Column1, Column2, Column3, Column4, Column5, Column6, Column7, Column8, Column9, Column10, Column11, Column12, Column13, Column14)"""
|
|
|
|
)
|
|
|
|
)
|
|
|
|
b = TSV(
|
|
|
|
second.query(
|
|
|
|
"""SELECT sipHash64(*) from db_different_schema.destination
|
|
|
|
ORDER BY (Column1, Column2, Column3, Column4, Column5, Column6, Column7, Column8, Column9, Column10, Column11, Column12, Column13, Column14)"""
|
|
|
|
)
|
|
|
|
)
|
2021-04-22 18:04:32 +00:00
|
|
|
assert a == b, "Data"
|
|
|
|
|
2021-06-08 01:50:43 +00:00
|
|
|
first.query("DROP DATABASE IF EXISTS db_different_schema SYNC")
|
|
|
|
second.query("DROP DATABASE IF EXISTS db_different_schema SYNC")
|
|
|
|
|
2021-04-22 18:04:32 +00:00
|
|
|
|
2021-04-22 20:37:22 +00:00
|
|
|
# Just simple copying, but table schema has TTL on columns
|
|
|
|
# Also table will have slightly different schema
|
|
|
|
class TaskTTL:
|
|
|
|
def __init__(self, cluster):
|
|
|
|
self.cluster = cluster
|
2022-03-22 16:39:58 +00:00
|
|
|
self.zk_task_path = "/clickhouse-copier/task_ttl_columns"
|
2021-04-22 20:37:22 +00:00
|
|
|
self.container_task_file = "/task_ttl_columns.xml"
|
|
|
|
|
|
|
|
for instance_name, _ in cluster.instances.items():
|
|
|
|
instance = cluster.instances[instance_name]
|
2022-03-22 16:39:58 +00:00
|
|
|
instance.copy_file_to_container(
|
|
|
|
os.path.join(CURRENT_TEST_DIR, "./task_ttl_columns.xml"),
|
|
|
|
self.container_task_file,
|
|
|
|
)
|
|
|
|
print(
|
|
|
|
"Copied task file to container of '{}' instance. Path {}".format(
|
|
|
|
instance_name, self.container_task_file
|
|
|
|
)
|
|
|
|
)
|
2021-04-22 20:37:22 +00:00
|
|
|
|
|
|
|
def start(self):
|
2021-06-08 01:50:43 +00:00
|
|
|
first = cluster.instances["first_of_two"]
|
|
|
|
second = cluster.instances["second_of_two"]
|
|
|
|
|
|
|
|
first.query("DROP DATABASE IF EXISTS db_ttl_columns SYNC")
|
|
|
|
second.query("DROP DATABASE IF EXISTS db_ttl_columns SYNC")
|
|
|
|
|
|
|
|
first.query("CREATE DATABASE IF NOT EXISTS db_ttl_columns;")
|
2022-03-22 16:39:58 +00:00
|
|
|
first.query(
|
|
|
|
"""CREATE TABLE db_ttl_columns.source
|
2021-04-22 20:37:22 +00:00
|
|
|
(
|
|
|
|
Column1 String,
|
|
|
|
Column2 UInt32,
|
|
|
|
Column3 Date,
|
|
|
|
Column4 DateTime,
|
|
|
|
Column5 UInt16,
|
|
|
|
Column6 String TTL now() + INTERVAL 1 MONTH,
|
|
|
|
Column7 Decimal(3, 1) TTL now() + INTERVAL 1 MONTH,
|
|
|
|
Column8 Tuple(Float64, Float64) TTL now() + INTERVAL 1 MONTH
|
|
|
|
)
|
|
|
|
ENGINE = MergeTree()
|
|
|
|
PARTITION BY (toYYYYMMDD(Column3), Column3)
|
|
|
|
PRIMARY KEY (Column1, Column2, Column3)
|
|
|
|
ORDER BY (Column1, Column2, Column3)
|
2022-03-22 16:39:58 +00:00
|
|
|
SETTINGS index_granularity = 8192"""
|
|
|
|
)
|
2021-04-22 20:37:22 +00:00
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
first.query(
|
|
|
|
"""INSERT INTO db_ttl_columns.source SELECT * FROM generateRandom(
|
2021-04-22 20:37:22 +00:00
|
|
|
'Column1 String, Column2 UInt32, Column3 Date, Column4 DateTime, Column5 UInt16,
|
2022-03-22 16:39:58 +00:00
|
|
|
Column6 String, Column7 Decimal(3, 1), Column8 Tuple(Float64, Float64)', 1, 10, 2) LIMIT 50;"""
|
|
|
|
)
|
2021-04-22 20:37:22 +00:00
|
|
|
|
2021-06-08 01:50:43 +00:00
|
|
|
second.query("CREATE DATABASE IF NOT EXISTS db_ttl_columns;")
|
2022-03-22 16:39:58 +00:00
|
|
|
second.query(
|
|
|
|
"""CREATE TABLE db_ttl_columns.destination
|
2021-04-22 20:37:22 +00:00
|
|
|
(
|
|
|
|
Column1 String,
|
|
|
|
Column2 UInt32,
|
|
|
|
Column3 Date,
|
|
|
|
Column4 DateTime TTL now() + INTERVAL 1 MONTH,
|
|
|
|
Column5 UInt16 TTL now() + INTERVAL 1 MONTH,
|
|
|
|
Column6 String TTL now() + INTERVAL 1 MONTH,
|
|
|
|
Column7 Decimal(3, 1) TTL now() + INTERVAL 1 MONTH,
|
|
|
|
Column8 Tuple(Float64, Float64)
|
|
|
|
) ENGINE = MergeTree()
|
|
|
|
PARTITION BY toYYYYMMDD(Column3)
|
2022-03-22 16:39:58 +00:00
|
|
|
ORDER BY (Column3, Column2, Column1);"""
|
|
|
|
)
|
2021-06-01 22:03:08 +00:00
|
|
|
|
2021-04-22 20:37:22 +00:00
|
|
|
print("Preparation completed")
|
|
|
|
|
|
|
|
def check(self):
|
2021-06-08 01:50:43 +00:00
|
|
|
first = cluster.instances["first_of_two"]
|
|
|
|
second = cluster.instances["second_of_two"]
|
2021-04-22 20:37:22 +00:00
|
|
|
|
|
|
|
a = first.query("SELECT count() from db_ttl_columns.source")
|
|
|
|
b = second.query("SELECT count() from db_ttl_columns.destination")
|
|
|
|
assert a == b, "Count"
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
a = TSV(
|
|
|
|
first.query(
|
|
|
|
"""SELECT sipHash64(*) from db_ttl_columns.source
|
|
|
|
ORDER BY (Column1, Column2, Column3, Column4, Column5, Column6, Column7, Column8)"""
|
|
|
|
)
|
|
|
|
)
|
|
|
|
b = TSV(
|
|
|
|
second.query(
|
|
|
|
"""SELECT sipHash64(*) from db_ttl_columns.destination
|
|
|
|
ORDER BY (Column1, Column2, Column3, Column4, Column5, Column6, Column7, Column8)"""
|
|
|
|
)
|
|
|
|
)
|
2021-04-22 20:37:22 +00:00
|
|
|
assert a == b, "Data"
|
|
|
|
|
2021-06-08 01:50:43 +00:00
|
|
|
first.query("DROP DATABASE IF EXISTS db_ttl_columns SYNC")
|
|
|
|
second.query("DROP DATABASE IF EXISTS db_ttl_columns SYNC")
|
|
|
|
|
2021-04-22 20:37:22 +00:00
|
|
|
|
2021-04-22 23:54:57 +00:00
|
|
|
class TaskSkipIndex:
|
|
|
|
def __init__(self, cluster):
|
|
|
|
self.cluster = cluster
|
2022-03-22 16:39:58 +00:00
|
|
|
self.zk_task_path = "/clickhouse-copier/task_skip_index"
|
2021-04-22 23:54:57 +00:00
|
|
|
self.container_task_file = "/task_skip_index.xml"
|
|
|
|
|
|
|
|
for instance_name, _ in cluster.instances.items():
|
|
|
|
instance = cluster.instances[instance_name]
|
2022-03-22 16:39:58 +00:00
|
|
|
instance.copy_file_to_container(
|
|
|
|
os.path.join(CURRENT_TEST_DIR, "./task_skip_index.xml"),
|
|
|
|
self.container_task_file,
|
|
|
|
)
|
|
|
|
print(
|
|
|
|
"Copied task file to container of '{}' instance. Path {}".format(
|
|
|
|
instance_name, self.container_task_file
|
|
|
|
)
|
|
|
|
)
|
2021-04-22 23:54:57 +00:00
|
|
|
|
|
|
|
def start(self):
|
2021-06-08 01:50:43 +00:00
|
|
|
first = cluster.instances["first_of_two"]
|
|
|
|
second = cluster.instances["second_of_two"]
|
|
|
|
|
|
|
|
first.query("DROP DATABASE IF EXISTS db_skip_index SYNC")
|
|
|
|
second.query("DROP DATABASE IF EXISTS db_skip_index SYNC")
|
|
|
|
|
|
|
|
first.query("CREATE DATABASE IF NOT EXISTS db_skip_index;")
|
2022-03-22 16:39:58 +00:00
|
|
|
first.query(
|
|
|
|
"""CREATE TABLE db_skip_index.source
|
2021-04-22 23:54:57 +00:00
|
|
|
(
|
|
|
|
Column1 UInt64,
|
|
|
|
Column2 Int32,
|
|
|
|
Column3 Date,
|
|
|
|
Column4 DateTime,
|
|
|
|
Column5 String,
|
|
|
|
INDEX a (Column1 * Column2, Column5) TYPE minmax GRANULARITY 3,
|
|
|
|
INDEX b (Column1 * length(Column5)) TYPE set(1000) GRANULARITY 4
|
|
|
|
)
|
|
|
|
ENGINE = MergeTree()
|
|
|
|
PARTITION BY (toYYYYMMDD(Column3), Column3)
|
|
|
|
PRIMARY KEY (Column1, Column2, Column3)
|
|
|
|
ORDER BY (Column1, Column2, Column3)
|
2022-03-22 16:39:58 +00:00
|
|
|
SETTINGS index_granularity = 8192"""
|
|
|
|
)
|
2021-04-22 23:54:57 +00:00
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
first.query(
|
|
|
|
"""INSERT INTO db_skip_index.source SELECT * FROM generateRandom(
|
|
|
|
'Column1 UInt64, Column2 Int32, Column3 Date, Column4 DateTime, Column5 String', 1, 10, 2) LIMIT 100;"""
|
|
|
|
)
|
2021-04-22 23:54:57 +00:00
|
|
|
|
2021-06-08 01:50:43 +00:00
|
|
|
second.query("CREATE DATABASE IF NOT EXISTS db_skip_index;")
|
2022-03-22 16:39:58 +00:00
|
|
|
second.query(
|
|
|
|
"""CREATE TABLE db_skip_index.destination
|
2021-04-22 23:54:57 +00:00
|
|
|
(
|
|
|
|
Column1 UInt64,
|
|
|
|
Column2 Int32,
|
|
|
|
Column3 Date,
|
|
|
|
Column4 DateTime,
|
|
|
|
Column5 String,
|
|
|
|
INDEX a (Column1 * Column2, Column5) TYPE minmax GRANULARITY 3,
|
|
|
|
INDEX b (Column1 * length(Column5)) TYPE set(1000) GRANULARITY 4
|
|
|
|
) ENGINE = MergeTree()
|
|
|
|
PARTITION BY toYYYYMMDD(Column3)
|
2022-03-22 16:39:58 +00:00
|
|
|
ORDER BY (Column3, Column2, Column1);"""
|
|
|
|
)
|
2021-06-01 22:03:08 +00:00
|
|
|
|
2021-04-22 23:54:57 +00:00
|
|
|
print("Preparation completed")
|
|
|
|
|
|
|
|
def check(self):
|
2021-06-08 01:50:43 +00:00
|
|
|
first = cluster.instances["first_of_two"]
|
|
|
|
second = cluster.instances["second_of_two"]
|
2021-04-22 23:54:57 +00:00
|
|
|
|
|
|
|
a = first.query("SELECT count() from db_skip_index.source")
|
|
|
|
b = second.query("SELECT count() from db_skip_index.destination")
|
|
|
|
assert a == b, "Count"
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
a = TSV(
|
|
|
|
first.query(
|
|
|
|
"""SELECT sipHash64(*) from db_skip_index.source
|
|
|
|
ORDER BY (Column1, Column2, Column3, Column4, Column5)"""
|
|
|
|
)
|
|
|
|
)
|
|
|
|
b = TSV(
|
|
|
|
second.query(
|
|
|
|
"""SELECT sipHash64(*) from db_skip_index.destination
|
|
|
|
ORDER BY (Column1, Column2, Column3, Column4, Column5)"""
|
|
|
|
)
|
|
|
|
)
|
2021-04-22 23:54:57 +00:00
|
|
|
assert a == b, "Data"
|
|
|
|
|
2021-06-08 01:50:43 +00:00
|
|
|
first.query("DROP DATABASE IF EXISTS db_skip_index SYNC")
|
|
|
|
second.query("DROP DATABASE IF EXISTS db_skip_index SYNC")
|
|
|
|
|
2021-04-22 23:54:57 +00:00
|
|
|
|
2021-04-23 14:39:03 +00:00
|
|
|
class TaskTTLMoveToVolume:
|
|
|
|
def __init__(self, cluster):
|
|
|
|
self.cluster = cluster
|
2022-03-22 16:39:58 +00:00
|
|
|
self.zk_task_path = "/clickhouse-copier/task_ttl_move_to_volume"
|
2021-04-23 14:39:03 +00:00
|
|
|
self.container_task_file = "/task_ttl_move_to_volume.xml"
|
|
|
|
|
|
|
|
for instance_name, _ in cluster.instances.items():
|
|
|
|
instance = cluster.instances[instance_name]
|
2022-03-22 16:39:58 +00:00
|
|
|
instance.copy_file_to_container(
|
|
|
|
os.path.join(CURRENT_TEST_DIR, "./task_ttl_move_to_volume.xml"),
|
|
|
|
self.container_task_file,
|
|
|
|
)
|
|
|
|
print(
|
|
|
|
"Copied task file to container of '{}' instance. Path {}".format(
|
|
|
|
instance_name, self.container_task_file
|
|
|
|
)
|
|
|
|
)
|
2021-04-23 14:39:03 +00:00
|
|
|
|
|
|
|
def start(self):
|
2021-06-08 01:50:43 +00:00
|
|
|
first = cluster.instances["first_of_two"]
|
|
|
|
second = cluster.instances["first_of_two"]
|
|
|
|
|
|
|
|
first.query("DROP DATABASE IF EXISTS db_move_to_volume SYNC")
|
|
|
|
second.query("DROP DATABASE IF EXISTS db_move_to_volume SYNC")
|
|
|
|
|
|
|
|
first.query("CREATE DATABASE IF NOT EXISTS db_move_to_volume;")
|
2022-03-22 16:39:58 +00:00
|
|
|
first.query(
|
|
|
|
"""CREATE TABLE db_move_to_volume.source
|
2021-04-23 14:39:03 +00:00
|
|
|
(
|
|
|
|
Column1 UInt64,
|
|
|
|
Column2 Int32,
|
|
|
|
Column3 Date,
|
|
|
|
Column4 DateTime,
|
|
|
|
Column5 String
|
|
|
|
)
|
|
|
|
ENGINE = MergeTree()
|
|
|
|
PARTITION BY (toYYYYMMDD(Column3), Column3)
|
|
|
|
PRIMARY KEY (Column1, Column2, Column3)
|
|
|
|
ORDER BY (Column1, Column2, Column3)
|
2021-06-01 22:03:08 +00:00
|
|
|
TTL Column3 + INTERVAL 1 MONTH TO VOLUME 'external'
|
2022-03-22 16:39:58 +00:00
|
|
|
SETTINGS storage_policy = 'external_with_jbods';"""
|
|
|
|
)
|
2021-04-23 14:39:03 +00:00
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
first.query(
|
|
|
|
"""INSERT INTO db_move_to_volume.source SELECT * FROM generateRandom(
|
|
|
|
'Column1 UInt64, Column2 Int32, Column3 Date, Column4 DateTime, Column5 String', 1, 10, 2) LIMIT 100;"""
|
|
|
|
)
|
2021-04-23 14:39:03 +00:00
|
|
|
|
2021-06-08 01:50:43 +00:00
|
|
|
second.query("CREATE DATABASE IF NOT EXISTS db_move_to_volume;")
|
2022-03-22 16:39:58 +00:00
|
|
|
second.query(
|
|
|
|
"""CREATE TABLE db_move_to_volume.destination
|
2021-04-23 14:39:03 +00:00
|
|
|
(
|
|
|
|
Column1 UInt64,
|
|
|
|
Column2 Int32,
|
|
|
|
Column3 Date,
|
|
|
|
Column4 DateTime,
|
|
|
|
Column5 String
|
|
|
|
) ENGINE = MergeTree()
|
|
|
|
PARTITION BY toYYYYMMDD(Column3)
|
|
|
|
ORDER BY (Column3, Column2, Column1)
|
2021-06-01 22:03:08 +00:00
|
|
|
TTL Column3 + INTERVAL 1 MONTH TO VOLUME 'external'
|
2022-03-22 16:39:58 +00:00
|
|
|
SETTINGS storage_policy = 'external_with_jbods';"""
|
|
|
|
)
|
2021-06-01 22:03:08 +00:00
|
|
|
|
2021-04-23 14:39:03 +00:00
|
|
|
print("Preparation completed")
|
|
|
|
|
|
|
|
def check(self):
|
2021-06-08 01:50:43 +00:00
|
|
|
first = cluster.instances["first_of_two"]
|
|
|
|
second = cluster.instances["second_of_two"]
|
2021-04-23 14:39:03 +00:00
|
|
|
|
|
|
|
a = first.query("SELECT count() from db_move_to_volume.source")
|
|
|
|
b = second.query("SELECT count() from db_move_to_volume.destination")
|
|
|
|
assert a == b, "Count"
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
a = TSV(
|
|
|
|
first.query(
|
|
|
|
"""SELECT sipHash64(*) from db_move_to_volume.source
|
|
|
|
ORDER BY (Column1, Column2, Column3, Column4, Column5)"""
|
|
|
|
)
|
|
|
|
)
|
|
|
|
b = TSV(
|
|
|
|
second.query(
|
|
|
|
"""SELECT sipHash64(*) from db_move_to_volume.destination
|
|
|
|
ORDER BY (Column1, Column2, Column3, Column4, Column5)"""
|
|
|
|
)
|
|
|
|
)
|
2021-04-23 14:39:03 +00:00
|
|
|
assert a == b, "Data"
|
|
|
|
|
2021-06-08 01:50:43 +00:00
|
|
|
first.query("DROP DATABASE IF EXISTS db_move_to_volume SYNC")
|
|
|
|
second.query("DROP DATABASE IF EXISTS db_move_to_volume SYNC")
|
|
|
|
|
2021-04-23 14:39:03 +00:00
|
|
|
|
2021-04-23 15:46:52 +00:00
|
|
|
class TaskDropTargetPartition:
|
|
|
|
def __init__(self, cluster):
|
|
|
|
self.cluster = cluster
|
2022-03-22 16:39:58 +00:00
|
|
|
self.zk_task_path = "/clickhouse-copier/task_drop_target_partition"
|
2021-04-23 15:46:52 +00:00
|
|
|
self.container_task_file = "/task_drop_target_partition.xml"
|
|
|
|
|
|
|
|
for instance_name, _ in cluster.instances.items():
|
|
|
|
instance = cluster.instances[instance_name]
|
2022-03-22 16:39:58 +00:00
|
|
|
instance.copy_file_to_container(
|
|
|
|
os.path.join(CURRENT_TEST_DIR, "./task_drop_target_partition.xml"),
|
|
|
|
self.container_task_file,
|
|
|
|
)
|
|
|
|
print(
|
|
|
|
"Copied task file to container of '{}' instance. Path {}".format(
|
|
|
|
instance_name, self.container_task_file
|
|
|
|
)
|
|
|
|
)
|
2021-04-23 15:46:52 +00:00
|
|
|
|
|
|
|
def start(self):
|
2021-06-08 01:50:43 +00:00
|
|
|
first = cluster.instances["first_of_two"]
|
|
|
|
second = cluster.instances["second_of_two"]
|
|
|
|
|
|
|
|
first.query("DROP DATABASE IF EXISTS db_drop_target_partition SYNC")
|
|
|
|
second.query("DROP DATABASE IF EXISTS db_drop_target_partition SYNC")
|
|
|
|
|
|
|
|
first.query("CREATE DATABASE IF NOT EXISTS db_drop_target_partition;")
|
2022-03-22 16:39:58 +00:00
|
|
|
first.query(
|
|
|
|
"""CREATE TABLE db_drop_target_partition.source
|
2021-04-23 15:46:52 +00:00
|
|
|
(
|
|
|
|
Column1 UInt64,
|
|
|
|
Column2 Int32,
|
|
|
|
Column3 Date,
|
|
|
|
Column4 DateTime,
|
|
|
|
Column5 String
|
|
|
|
)
|
|
|
|
ENGINE = MergeTree()
|
|
|
|
PARTITION BY (toYYYYMMDD(Column3), Column3)
|
|
|
|
PRIMARY KEY (Column1, Column2, Column3)
|
2022-03-22 16:39:58 +00:00
|
|
|
ORDER BY (Column1, Column2, Column3);"""
|
|
|
|
)
|
2021-04-23 15:46:52 +00:00
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
first.query(
|
|
|
|
"""INSERT INTO db_drop_target_partition.source SELECT * FROM generateRandom(
|
|
|
|
'Column1 UInt64, Column2 Int32, Column3 Date, Column4 DateTime, Column5 String', 1, 10, 2) LIMIT 100;"""
|
|
|
|
)
|
2021-06-08 01:50:43 +00:00
|
|
|
|
|
|
|
second.query("CREATE DATABASE IF NOT EXISTS db_drop_target_partition;")
|
2022-03-22 16:39:58 +00:00
|
|
|
second.query(
|
|
|
|
"""CREATE TABLE db_drop_target_partition.destination
|
2021-04-23 15:46:52 +00:00
|
|
|
(
|
|
|
|
Column1 UInt64,
|
|
|
|
Column2 Int32,
|
|
|
|
Column3 Date,
|
|
|
|
Column4 DateTime,
|
|
|
|
Column5 String
|
|
|
|
) ENGINE = MergeTree()
|
|
|
|
PARTITION BY toYYYYMMDD(Column3)
|
2022-03-22 16:39:58 +00:00
|
|
|
ORDER BY (Column3, Column2, Column1);"""
|
|
|
|
)
|
2021-04-23 15:46:52 +00:00
|
|
|
|
|
|
|
# Insert data in target too. It has to be dropped.
|
2022-03-22 16:39:58 +00:00
|
|
|
first.query(
|
|
|
|
"""INSERT INTO db_drop_target_partition.destination SELECT * FROM db_drop_target_partition.source;"""
|
|
|
|
)
|
2021-06-01 22:03:08 +00:00
|
|
|
|
2021-04-23 15:46:52 +00:00
|
|
|
print("Preparation completed")
|
|
|
|
|
|
|
|
def check(self):
|
2021-06-08 01:50:43 +00:00
|
|
|
first = cluster.instances["first_of_two"]
|
|
|
|
second = cluster.instances["second_of_two"]
|
2021-04-23 15:46:52 +00:00
|
|
|
|
|
|
|
a = first.query("SELECT count() from db_drop_target_partition.source")
|
|
|
|
b = second.query("SELECT count() from db_drop_target_partition.destination")
|
|
|
|
assert a == b, "Count"
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
a = TSV(
|
|
|
|
first.query(
|
|
|
|
"""SELECT sipHash64(*) from db_drop_target_partition.source
|
|
|
|
ORDER BY (Column1, Column2, Column3, Column4, Column5)"""
|
|
|
|
)
|
|
|
|
)
|
|
|
|
b = TSV(
|
|
|
|
second.query(
|
|
|
|
"""SELECT sipHash64(*) from db_drop_target_partition.destination
|
|
|
|
ORDER BY (Column1, Column2, Column3, Column4, Column5)"""
|
|
|
|
)
|
|
|
|
)
|
2021-04-23 15:46:52 +00:00
|
|
|
assert a == b, "Data"
|
2021-04-23 14:39:03 +00:00
|
|
|
|
2021-06-08 01:50:43 +00:00
|
|
|
first.query("DROP DATABASE IF EXISTS db_drop_target_partition SYNC")
|
|
|
|
second.query("DROP DATABASE IF EXISTS db_drop_target_partition SYNC")
|
|
|
|
|
2021-04-23 14:39:03 +00:00
|
|
|
|
2021-06-07 10:49:01 +00:00
|
|
|
def execute_task(started_cluster, task, cmd_options):
|
2021-04-22 18:04:32 +00:00
|
|
|
task.start()
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
zk = started_cluster.get_kazoo_client("zoo1")
|
2021-04-22 18:04:32 +00:00
|
|
|
print("Use ZooKeeper server: {}:{}".format(zk.hosts[0][0], zk.hosts[0][1]))
|
|
|
|
|
|
|
|
# Run cluster-copier processes on each node
|
2021-06-11 12:00:40 +00:00
|
|
|
docker_api = started_cluster.docker_client.api
|
2021-04-22 18:04:32 +00:00
|
|
|
copiers_exec_ids = []
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
cmd = [
|
|
|
|
"/usr/bin/clickhouse",
|
|
|
|
"copier",
|
|
|
|
"--config",
|
|
|
|
"/etc/clickhouse-server/config-copier.xml",
|
|
|
|
"--task-path",
|
|
|
|
task.zk_task_path,
|
|
|
|
"--task-file",
|
|
|
|
task.container_task_file,
|
|
|
|
"--task-upload-force",
|
|
|
|
"true",
|
|
|
|
"--base-dir",
|
|
|
|
"/var/log/clickhouse-server/copier",
|
|
|
|
]
|
2021-04-22 18:04:32 +00:00
|
|
|
cmd += cmd_options
|
|
|
|
|
|
|
|
print(cmd)
|
|
|
|
|
2021-06-11 12:00:40 +00:00
|
|
|
for instance_name in started_cluster.instances.keys():
|
2021-06-07 10:49:01 +00:00
|
|
|
instance = started_cluster.instances[instance_name]
|
2021-04-22 18:04:32 +00:00
|
|
|
container = instance.get_docker_handle()
|
2022-03-22 16:39:58 +00:00
|
|
|
instance.copy_file_to_container(
|
|
|
|
os.path.join(CURRENT_TEST_DIR, "configs_two_nodes/config-copier.xml"),
|
|
|
|
"/etc/clickhouse-server/config-copier.xml",
|
|
|
|
)
|
2021-04-22 18:04:32 +00:00
|
|
|
logging.info("Copied copier config to {}".format(instance.name))
|
|
|
|
exec_id = docker_api.exec_create(container.id, cmd, stderr=True)
|
2022-03-22 16:39:58 +00:00
|
|
|
output = docker_api.exec_start(exec_id).decode("utf8")
|
2021-04-22 18:04:32 +00:00
|
|
|
logging.info(output)
|
|
|
|
copiers_exec_ids.append(exec_id)
|
2022-03-22 16:39:58 +00:00
|
|
|
logging.info(
|
|
|
|
"Copier for {} ({}) has started".format(instance.name, instance.ip_address)
|
|
|
|
)
|
2021-04-22 18:04:32 +00:00
|
|
|
|
|
|
|
# time.sleep(1000)
|
|
|
|
|
|
|
|
# Wait for copiers stopping and check their return codes
|
2022-03-22 16:39:58 +00:00
|
|
|
for exec_id, instance in zip(
|
|
|
|
copiers_exec_ids, iter(started_cluster.instances.values())
|
|
|
|
):
|
2021-04-22 18:04:32 +00:00
|
|
|
while True:
|
|
|
|
res = docker_api.exec_inspect(exec_id)
|
2022-03-22 16:39:58 +00:00
|
|
|
if not res["Running"]:
|
2021-04-22 18:04:32 +00:00
|
|
|
break
|
|
|
|
time.sleep(1)
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
assert res["ExitCode"] == 0, "Instance: {} ({}). Info: {}".format(
|
|
|
|
instance.name, instance.ip_address, repr(res)
|
|
|
|
)
|
2021-04-22 18:04:32 +00:00
|
|
|
|
|
|
|
try:
|
|
|
|
task.check()
|
|
|
|
finally:
|
|
|
|
zk.delete(task.zk_task_path, recursive=True)
|
|
|
|
|
|
|
|
|
|
|
|
# Tests
|
2021-07-08 20:29:25 +00:00
|
|
|
@pytest.mark.skip(reason="Too flaky :(")
|
2021-04-22 20:37:22 +00:00
|
|
|
def test_different_schema(started_cluster):
|
2021-06-07 10:49:01 +00:00
|
|
|
execute_task(started_cluster, TaskWithDifferentSchema(started_cluster), [])
|
2021-04-22 18:04:32 +00:00
|
|
|
|
2021-04-22 20:37:22 +00:00
|
|
|
|
2021-07-08 20:29:25 +00:00
|
|
|
@pytest.mark.skip(reason="Too flaky :(")
|
2021-04-22 20:37:22 +00:00
|
|
|
def test_ttl_columns(started_cluster):
|
2021-06-07 10:49:01 +00:00
|
|
|
execute_task(started_cluster, TaskTTL(started_cluster), [])
|
2021-04-22 23:54:57 +00:00
|
|
|
|
|
|
|
|
2021-07-08 20:29:25 +00:00
|
|
|
@pytest.mark.skip(reason="Too flaky :(")
|
2021-04-22 23:54:57 +00:00
|
|
|
def test_skip_index(started_cluster):
|
2021-06-07 10:49:01 +00:00
|
|
|
execute_task(started_cluster, TaskSkipIndex(started_cluster), [])
|
2021-04-23 14:39:03 +00:00
|
|
|
|
|
|
|
|
2021-06-08 11:14:45 +00:00
|
|
|
@pytest.mark.skip(reason="Too flaky :(")
|
2021-04-23 14:39:03 +00:00
|
|
|
def test_ttl_move_to_volume(started_cluster):
|
2021-06-07 10:49:01 +00:00
|
|
|
execute_task(started_cluster, TaskTTLMoveToVolume(started_cluster), [])
|