mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-15 10:52:30 +00:00
288 lines
9.6 KiB
Python
288 lines
9.6 KiB
Python
import os
|
|
import sys
|
|
import time
|
|
import logging
|
|
import pytest
|
|
|
|
from helpers.cluster import ClickHouseCluster
|
|
from helpers.test_tools import TSV
|
|
|
|
import docker
|
|
|
|
CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
sys.path.insert(0, os.path.dirname(CURRENT_TEST_DIR))
|
|
|
|
cluster = ClickHouseCluster(__file__)
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def started_cluster():
|
|
global cluster
|
|
try:
|
|
|
|
for name in ["first", "second", "third"]:
|
|
cluster.add_instance(
|
|
name,
|
|
main_configs=[
|
|
"configs_three_nodes/conf.d/clusters.xml",
|
|
"configs_three_nodes/conf.d/ddl.xml",
|
|
],
|
|
user_configs=["configs_three_nodes/users.xml"],
|
|
with_zookeeper=True,
|
|
)
|
|
|
|
cluster.start()
|
|
yield cluster
|
|
|
|
finally:
|
|
cluster.shutdown()
|
|
|
|
|
|
class Task:
|
|
def __init__(self, cluster):
|
|
self.cluster = cluster
|
|
self.zk_task_path = "/clickhouse-copier/task"
|
|
self.container_task_file = "/task_taxi_data.xml"
|
|
|
|
for instance_name, _ in cluster.instances.items():
|
|
instance = cluster.instances[instance_name]
|
|
instance.copy_file_to_container(
|
|
os.path.join(CURRENT_TEST_DIR, "./task_taxi_data.xml"),
|
|
self.container_task_file,
|
|
)
|
|
logging.debug(
|
|
f"Copied task file to container of '{instance_name}' instance. Path {self.container_task_file}"
|
|
)
|
|
|
|
def start(self):
|
|
for name in ["first", "second", "third"]:
|
|
node = cluster.instances[name]
|
|
node.query("DROP DATABASE IF EXISTS dailyhistory SYNC;")
|
|
node.query("DROP DATABASE IF EXISTS monthlyhistory SYNC;")
|
|
|
|
first = cluster.instances["first"]
|
|
|
|
# daily partition database
|
|
first.query("CREATE DATABASE IF NOT EXISTS dailyhistory on cluster events;")
|
|
first.query(
|
|
"""CREATE TABLE dailyhistory.yellow_tripdata_staging ON CLUSTER events
|
|
(
|
|
id UUID DEFAULT generateUUIDv4(),
|
|
vendor_id String,
|
|
tpep_pickup_datetime DateTime('UTC'),
|
|
tpep_dropoff_datetime DateTime('UTC'),
|
|
passenger_count Nullable(Float64),
|
|
trip_distance String,
|
|
pickup_longitude Float64,
|
|
pickup_latitude Float64,
|
|
rate_code_id String,
|
|
store_and_fwd_flag String,
|
|
dropoff_longitude Float64,
|
|
dropoff_latitude Float64,
|
|
payment_type String,
|
|
fare_amount String,
|
|
extra String,
|
|
mta_tax String,
|
|
tip_amount String,
|
|
tolls_amount String,
|
|
improvement_surcharge String,
|
|
total_amount String,
|
|
pickup_location_id String,
|
|
dropoff_location_id String,
|
|
congestion_surcharge String,
|
|
junk1 String, junk2 String
|
|
)
|
|
Engine = ReplacingMergeTree()
|
|
PRIMARY KEY (tpep_pickup_datetime, id)
|
|
ORDER BY (tpep_pickup_datetime, id)
|
|
PARTITION BY (toYYYYMMDD(tpep_pickup_datetime))"""
|
|
)
|
|
|
|
first.query(
|
|
"""CREATE TABLE dailyhistory.yellow_tripdata
|
|
ON CLUSTER events
|
|
AS dailyhistory.yellow_tripdata_staging
|
|
ENGINE = Distributed('events', 'dailyhistory', yellow_tripdata_staging, sipHash64(id) % 3);"""
|
|
)
|
|
|
|
first.query(
|
|
"""INSERT INTO dailyhistory.yellow_tripdata
|
|
SELECT * FROM generateRandom(
|
|
'id UUID DEFAULT generateUUIDv4(),
|
|
vendor_id String,
|
|
tpep_pickup_datetime DateTime(\\'UTC\\'),
|
|
tpep_dropoff_datetime DateTime(\\'UTC\\'),
|
|
passenger_count Nullable(Float64),
|
|
trip_distance String,
|
|
pickup_longitude Float64,
|
|
pickup_latitude Float64,
|
|
rate_code_id String,
|
|
store_and_fwd_flag String,
|
|
dropoff_longitude Float64,
|
|
dropoff_latitude Float64,
|
|
payment_type String,
|
|
fare_amount String,
|
|
extra String,
|
|
mta_tax String,
|
|
tip_amount String,
|
|
tolls_amount String,
|
|
improvement_surcharge String,
|
|
total_amount String,
|
|
pickup_location_id String,
|
|
dropoff_location_id String,
|
|
congestion_surcharge String,
|
|
junk1 String,
|
|
junk2 String',
|
|
1, 10, 2) LIMIT 50;"""
|
|
)
|
|
|
|
# monthly partition database
|
|
first.query("create database IF NOT EXISTS monthlyhistory on cluster events;")
|
|
first.query(
|
|
"""CREATE TABLE monthlyhistory.yellow_tripdata_staging ON CLUSTER events
|
|
(
|
|
id UUID DEFAULT generateUUIDv4(),
|
|
vendor_id String,
|
|
tpep_pickup_datetime DateTime('UTC'),
|
|
tpep_dropoff_datetime DateTime('UTC'),
|
|
passenger_count Nullable(Float64),
|
|
trip_distance String,
|
|
pickup_longitude Float64,
|
|
pickup_latitude Float64,
|
|
rate_code_id String,
|
|
store_and_fwd_flag String,
|
|
dropoff_longitude Float64,
|
|
dropoff_latitude Float64,
|
|
payment_type String,
|
|
fare_amount String,
|
|
extra String,
|
|
mta_tax String,
|
|
tip_amount String,
|
|
tolls_amount String,
|
|
improvement_surcharge String,
|
|
total_amount String,
|
|
pickup_location_id String,
|
|
dropoff_location_id String,
|
|
congestion_surcharge String,
|
|
junk1 String,
|
|
junk2 String
|
|
)
|
|
Engine = ReplacingMergeTree()
|
|
PRIMARY KEY (tpep_pickup_datetime, id)
|
|
ORDER BY (tpep_pickup_datetime, id)
|
|
PARTITION BY (pickup_location_id, toYYYYMM(tpep_pickup_datetime))"""
|
|
)
|
|
|
|
first.query(
|
|
"""CREATE TABLE monthlyhistory.yellow_tripdata
|
|
ON CLUSTER events
|
|
AS monthlyhistory.yellow_tripdata_staging
|
|
ENGINE = Distributed('events', 'monthlyhistory', yellow_tripdata_staging, sipHash64(id) % 3);"""
|
|
)
|
|
|
|
def check(self):
|
|
first = cluster.instances["first"]
|
|
a = TSV(first.query("SELECT count() from dailyhistory.yellow_tripdata"))
|
|
b = TSV(first.query("SELECT count() from monthlyhistory.yellow_tripdata"))
|
|
assert a == b, "Distributed tables"
|
|
|
|
for instance_name, instance in cluster.instances.items():
|
|
instance = cluster.instances[instance_name]
|
|
a = instance.query(
|
|
"SELECT count() from dailyhistory.yellow_tripdata_staging"
|
|
)
|
|
b = instance.query(
|
|
"SELECT count() from monthlyhistory.yellow_tripdata_staging"
|
|
)
|
|
assert a == b, "MergeTree tables on each shard"
|
|
|
|
a = TSV(
|
|
instance.query(
|
|
"SELECT sipHash64(*) from dailyhistory.yellow_tripdata_staging ORDER BY id"
|
|
)
|
|
)
|
|
b = TSV(
|
|
instance.query(
|
|
"SELECT sipHash64(*) from monthlyhistory.yellow_tripdata_staging ORDER BY id"
|
|
)
|
|
)
|
|
|
|
assert a == b, "Data on each shard"
|
|
|
|
for name in ["first", "second", "third"]:
|
|
node = cluster.instances[name]
|
|
node.query("DROP DATABASE IF EXISTS dailyhistory SYNC;")
|
|
node.query("DROP DATABASE IF EXISTS monthlyhistory SYNC;")
|
|
|
|
|
|
def execute_task(started_cluster, task, cmd_options):
|
|
task.start()
|
|
|
|
zk = started_cluster.get_kazoo_client("zoo1")
|
|
logging.debug("Use ZooKeeper server: {}:{}".format(zk.hosts[0][0], zk.hosts[0][1]))
|
|
|
|
# Run cluster-copier processes on each node
|
|
docker_api = started_cluster.docker_client.api
|
|
copiers_exec_ids = []
|
|
|
|
cmd = [
|
|
"/usr/bin/clickhouse",
|
|
"copier",
|
|
"--config",
|
|
"/etc/clickhouse-server/config-copier.xml",
|
|
"--task-path",
|
|
task.zk_task_path,
|
|
"--task-file",
|
|
task.container_task_file,
|
|
"--task-upload-force",
|
|
"true",
|
|
"--base-dir",
|
|
"/var/log/clickhouse-server/copier",
|
|
]
|
|
cmd += cmd_options
|
|
|
|
logging.debug(f"execute_task cmd: {cmd}")
|
|
|
|
for instance_name in started_cluster.instances.keys():
|
|
instance = started_cluster.instances[instance_name]
|
|
container = instance.get_docker_handle()
|
|
instance.copy_file_to_container(
|
|
os.path.join(CURRENT_TEST_DIR, "configs_three_nodes/config-copier.xml"),
|
|
"/etc/clickhouse-server/config-copier.xml",
|
|
)
|
|
logging.info("Copied copier config to {}".format(instance.name))
|
|
exec_id = docker_api.exec_create(container.id, cmd, stderr=True)
|
|
output = docker_api.exec_start(exec_id).decode("utf8")
|
|
logging.info(output)
|
|
copiers_exec_ids.append(exec_id)
|
|
logging.info(
|
|
"Copier for {} ({}) has started".format(instance.name, instance.ip_address)
|
|
)
|
|
|
|
# time.sleep(1000)
|
|
|
|
# Wait for copiers stopping and check their return codes
|
|
for exec_id, instance in zip(
|
|
copiers_exec_ids, iter(started_cluster.instances.values())
|
|
):
|
|
while True:
|
|
res = docker_api.exec_inspect(exec_id)
|
|
if not res["Running"]:
|
|
break
|
|
time.sleep(1)
|
|
|
|
assert res["ExitCode"] == 0, "Instance: {} ({}). Info: {}".format(
|
|
instance.name, instance.ip_address, repr(res)
|
|
)
|
|
|
|
try:
|
|
task.check()
|
|
finally:
|
|
zk.delete(task.zk_task_path, recursive=True)
|
|
|
|
|
|
# Tests
|
|
@pytest.mark.timeout(600)
|
|
def test(started_cluster):
|
|
execute_task(started_cluster, Task(started_cluster), [])
|