ClickHouse/tests/integration/test_postgresql_replica_database_engine_1/test.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

890 lines
30 KiB
Python
Raw Normal View History

2021-02-08 23:23:51 +00:00
import pytest
2021-02-08 23:23:51 +00:00
import time
import os.path as p
import random
2021-02-08 23:23:51 +00:00
from helpers.cluster import ClickHouseCluster
from helpers.test_tools import assert_eq_with_retry
from helpers.test_tools import TSV
2021-05-10 11:31:06 +00:00
from random import randrange
import threading
2022-01-07 21:27:46 +00:00
from helpers.postgres_utility import get_postgres_conn
from helpers.postgres_utility import PostgresManager
from helpers.postgres_utility import create_replication_slot, drop_replication_slot
from helpers.postgres_utility import create_postgres_schema, drop_postgres_schema
from helpers.postgres_utility import create_postgres_table, drop_postgres_table
from helpers.postgres_utility import check_tables_are_synchronized
from helpers.postgres_utility import check_several_tables_are_synchronized
from helpers.postgres_utility import assert_nested_table_is_created
from helpers.postgres_utility import assert_number_of_columns
from helpers.postgres_utility import (
postgres_table_template,
postgres_table_template_2,
postgres_table_template_3,
postgres_table_template_4,
)
2022-01-07 21:27:46 +00:00
from helpers.postgres_utility import queries
2021-02-08 23:23:51 +00:00
cluster = ClickHouseCluster(__file__)
2021-04-08 20:39:56 +00:00
instance = cluster.add_instance(
"instance",
2021-04-10 14:42:45 +00:00
main_configs=["configs/log_conf.xml"],
2021-04-08 20:39:56 +00:00
user_configs=["configs/users.xml"],
with_postgres=True,
stay_alive=True,
)
2021-02-08 23:23:51 +00:00
2022-01-07 21:27:46 +00:00
pg_manager = PostgresManager()
2021-02-08 23:23:51 +00:00
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
2023-05-11 16:09:46 +00:00
pg_manager.init(
instance,
cluster.postgres_ip,
cluster.postgres_port,
default_database="postgres_database",
)
2021-02-08 23:23:51 +00:00
yield cluster
finally:
cluster.shutdown()
2022-01-07 21:27:46 +00:00
@pytest.fixture(autouse=True)
def setup_teardown():
print("PostgreSQL is available - running test")
yield # run test
pg_manager.restart()
2021-02-08 23:23:51 +00:00
2022-01-07 21:27:46 +00:00
def test_load_and_sync_all_database_tables(started_cluster):
NUM_TABLES = 5
pg_manager.create_and_fill_postgres_tables(NUM_TABLES)
pg_manager.create_materialized_db(
ip=started_cluster.postgres_ip, port=started_cluster.postgres_port
)
check_several_tables_are_synchronized(instance, NUM_TABLES)
result = instance.query(
"SELECT count() FROM system.tables WHERE database = 'test_database';"
2021-02-08 23:23:51 +00:00
)
assert int(result) == NUM_TABLES
2021-07-05 03:53:48 +00:00
2021-02-08 23:23:51 +00:00
def test_replicating_dml(started_cluster):
NUM_TABLES = 5
for i in range(NUM_TABLES):
2023-05-11 16:09:46 +00:00
pg_manager.create_postgres_table(f"postgresql_replica_{i}")
2021-02-08 23:23:51 +00:00
instance.query(
"INSERT INTO postgres_database.postgresql_replica_{} SELECT number, {} from numbers(50)".format(
i, i
)
)
2021-02-08 23:23:51 +00:00
2022-01-07 21:27:46 +00:00
pg_manager.create_materialized_db(
ip=started_cluster.postgres_ip, port=started_cluster.postgres_port
)
2021-02-08 23:23:51 +00:00
for i in range(NUM_TABLES):
2021-02-11 21:59:58 +00:00
instance.query(
2023-05-11 16:09:46 +00:00
f"INSERT INTO postgres_database.postgresql_replica_{i} SELECT 50 + number, {i} from numbers(1000)"
)
2022-01-07 21:27:46 +00:00
check_several_tables_are_synchronized(instance, NUM_TABLES)
2021-02-08 23:23:51 +00:00
for i in range(NUM_TABLES):
2023-05-11 16:09:46 +00:00
pg_manager.execute(
f"UPDATE postgresql_replica_{i} SET value = {i} * {i} WHERE key < 50;"
)
2023-05-11 16:09:46 +00:00
pg_manager.execute(
f"UPDATE postgresql_replica_{i} SET value = {i} * {i} * {i} WHERE key >= 50;"
)
2023-05-11 16:09:46 +00:00
2022-01-07 21:27:46 +00:00
check_several_tables_are_synchronized(instance, NUM_TABLES)
2021-02-08 23:23:51 +00:00
for i in range(NUM_TABLES):
2023-05-11 16:09:46 +00:00
pg_manager.execute(
f"DELETE FROM postgresql_replica_{i} WHERE (value*value + {i}) % 2 = 0;"
)
2023-05-11 16:09:46 +00:00
pg_manager.execute(
f"UPDATE postgresql_replica_{i} SET value = value - (value % 7) WHERE key > 128 AND key < 512;"
)
2023-05-11 16:09:46 +00:00
pg_manager.execute(f"DELETE FROM postgresql_replica_{i} WHERE key % 7 = 1;")
2022-01-07 21:27:46 +00:00
check_several_tables_are_synchronized(instance, NUM_TABLES)
2021-02-08 23:23:51 +00:00
2021-02-12 10:05:13 +00:00
def test_different_data_types(started_cluster):
2022-01-07 21:27:46 +00:00
conn = get_postgres_conn(
ip=started_cluster.postgres_ip,
port=started_cluster.postgres_port,
2021-06-03 19:45:27 +00:00
database=True,
)
2021-02-12 10:05:13 +00:00
cursor = conn.cursor()
cursor.execute("drop table if exists test_data_types;")
cursor.execute("drop table if exists test_array_data_type;")
cursor.execute(
"""CREATE TABLE test_data_types (
id integer PRIMARY KEY, a smallint, b integer, c bigint, d real, e double precision, f serial, g bigserial,
h timestamp, i date, j decimal(5, 5), k numeric(5, 5))"""
)
cursor.execute(
"""CREATE TABLE test_array_data_type
(
key Integer NOT NULL PRIMARY KEY,
a Date[] NOT NULL, -- Date
b Timestamp[] NOT NULL, -- DateTime64(6)
2021-02-12 10:05:13 +00:00
c real[][] NOT NULL, -- Float32
d double precision[][] NOT NULL, -- Float64
e decimal(5, 5)[][][] NOT NULL, -- Decimal32
f integer[][][] NOT NULL, -- Int32
g Text[][][][][] NOT NULL, -- String
h Integer[][][], -- Nullable(Int32)
i Char(2)[][][][], -- Nullable(String)
k Char(2)[] -- Nullable(String)
)"""
2021-02-12 10:05:13 +00:00
)
2022-01-07 21:27:46 +00:00
pg_manager.create_materialized_db(
ip=started_cluster.postgres_ip, port=started_cluster.postgres_port
)
2021-02-12 10:05:13 +00:00
for i in range(10):
instance.query(
"""
2021-02-12 10:05:13 +00:00
INSERT INTO postgres_database.test_data_types VALUES
({}, -32768, -2147483648, -9223372036854775808, 1.12345, 1.1234567890, 2147483647, 9223372036854775807, '2000-05-12 12:12:12.012345', '2000-05-12', 0.2, 0.2)""".format(
i
)
)
2021-02-12 10:05:13 +00:00
2022-01-07 21:27:46 +00:00
check_tables_are_synchronized(instance, "test_data_types", "id")
2021-02-12 10:05:13 +00:00
result = instance.query(
"SELECT * FROM test_database.test_data_types ORDER BY id LIMIT 1;"
)
2021-08-16 02:00:39 +00:00
assert (
result
== "0\t-32768\t-2147483648\t-9223372036854775808\t1.12345\t1.123456789\t2147483647\t9223372036854775807\t2000-05-12 12:12:12.012345\t2000-05-12\t0.2\t0.2\n"
)
for i in range(10):
col = random.choice(["a", "b", "c"])
cursor.execute("UPDATE test_data_types SET {} = {};".format(col, i))
cursor.execute("UPDATE test_data_types SET i = '2020-12-12';")
2022-01-07 21:27:46 +00:00
check_tables_are_synchronized(instance, "test_data_types", "id")
2021-02-12 10:05:13 +00:00
instance.query(
"INSERT INTO postgres_database.test_array_data_type "
"VALUES ("
"0, "
"['2000-05-12', '2000-05-12'], "
"['2000-05-12 12:12:12.012345', '2000-05-12 12:12:12.012345'], "
2021-02-12 10:05:13 +00:00
"[[1.12345], [1.12345], [1.12345]], "
"[[1.1234567891], [1.1234567891], [1.1234567891]], "
"[[[0.11111, 0.11111]], [[0.22222, 0.22222]], [[0.33333, 0.33333]]], "
"[[[1, 1], [1, 1]], [[3, 3], [3, 3]], [[4, 4], [5, 5]]], "
"[[[[['winx', 'winx', 'winx']]]]], "
"[[[1, NULL], [NULL, 1]], [[NULL, NULL], [NULL, NULL]], [[4, 4], [5, 5]]], "
"[[[[NULL]]]], "
"[]"
")"
)
expected = (
"0\t"
+ "['2000-05-12','2000-05-12']\t"
+ "['2000-05-12 12:12:12.012345','2000-05-12 12:12:12.012345']\t"
2021-02-12 10:05:13 +00:00
+ "[[1.12345],[1.12345],[1.12345]]\t"
+ "[[1.1234567891],[1.1234567891],[1.1234567891]]\t"
+ "[[[0.11111,0.11111]],[[0.22222,0.22222]],[[0.33333,0.33333]]]\t"
"[[[1,1],[1,1]],[[3,3],[3,3]],[[4,4],[5,5]]]\t"
"[[[[['winx','winx','winx']]]]]\t"
"[[[1,NULL],[NULL,1]],[[NULL,NULL],[NULL,NULL]],[[4,4],[5,5]]]\t"
"[[[[NULL]]]]\t"
"[]\n"
)
2022-01-07 21:27:46 +00:00
check_tables_are_synchronized(instance, "test_array_data_type")
2021-02-12 10:05:13 +00:00
result = instance.query(
"SELECT * FROM test_database.test_array_data_type ORDER BY key;"
)
assert result == expected
2021-07-05 03:53:48 +00:00
2022-01-07 21:27:46 +00:00
pg_manager.drop_materialized_db()
2021-07-05 03:53:48 +00:00
cursor.execute("drop table if exists test_data_types;")
cursor.execute("drop table if exists test_array_data_type;")
2021-02-12 10:05:13 +00:00
def test_load_and_sync_subset_of_database_tables(started_cluster):
NUM_TABLES = 10
2022-01-07 21:27:46 +00:00
pg_manager.create_and_fill_postgres_tables(NUM_TABLES)
publication_tables = ""
for i in range(NUM_TABLES):
if i < int(NUM_TABLES / 2):
if publication_tables != "":
publication_tables += ", "
2022-01-07 21:27:46 +00:00
publication_tables += f"postgresql_replica_{i}"
2022-01-07 21:27:46 +00:00
pg_manager.create_materialized_db(
ip=started_cluster.postgres_ip,
port=started_cluster.postgres_port,
settings=[
"materialized_postgresql_tables_list = '{}'".format(publication_tables)
],
)
time.sleep(1)
for i in range(int(NUM_TABLES / 2)):
2022-01-07 21:27:46 +00:00
table_name = f"postgresql_replica_{i}"
assert_nested_table_is_created(instance, table_name)
result = instance.query(
"""SELECT count() FROM system.tables WHERE database = 'test_database';"""
)
assert int(result) == int(NUM_TABLES / 2)
database_tables = instance.query("SHOW TABLES FROM test_database")
for i in range(NUM_TABLES):
table_name = "postgresql_replica_{}".format(i)
if i < int(NUM_TABLES / 2):
assert table_name in database_tables
else:
assert table_name not in database_tables
instance.query(
"INSERT INTO postgres_database.{} SELECT 50 + number, {} from numbers(100)".format(
table_name, i
)
)
for i in range(NUM_TABLES):
2022-01-07 21:27:46 +00:00
table_name = f"postgresql_replica_{i}"
if i < int(NUM_TABLES / 2):
2022-01-07 21:27:46 +00:00
check_tables_are_synchronized(instance, table_name)
2021-02-22 12:35:53 +00:00
def test_changing_replica_identity_value(started_cluster):
2023-05-11 16:09:46 +00:00
pg_manager.create_postgres_table("postgresql_replica")
2021-02-22 12:35:53 +00:00
instance.query(
"INSERT INTO postgres_database.postgresql_replica SELECT 50 + number, number from numbers(50)"
)
2022-01-07 21:27:46 +00:00
pg_manager.create_materialized_db(
ip=started_cluster.postgres_ip, port=started_cluster.postgres_port
)
2021-02-22 12:35:53 +00:00
instance.query(
"INSERT INTO postgres_database.postgresql_replica SELECT 100 + number, number from numbers(50)"
)
2022-01-07 21:27:46 +00:00
check_tables_are_synchronized(instance, "postgresql_replica")
2023-05-11 16:09:46 +00:00
pg_manager.execute("UPDATE postgresql_replica SET key=key-25 WHERE key<100 ")
2022-01-07 21:27:46 +00:00
check_tables_are_synchronized(instance, "postgresql_replica")
2021-02-22 12:35:53 +00:00
2021-04-08 20:39:56 +00:00
def test_clickhouse_restart(started_cluster):
NUM_TABLES = 5
2022-01-07 21:27:46 +00:00
pg_manager.create_and_fill_postgres_tables(NUM_TABLES)
pg_manager.create_materialized_db(
ip=started_cluster.postgres_ip, port=started_cluster.postgres_port
)
check_several_tables_are_synchronized(instance, NUM_TABLES)
2021-04-08 20:39:56 +00:00
for i in range(NUM_TABLES):
instance.query(
"INSERT INTO postgres_database.postgresql_replica_{} SELECT 50 + number, {} from numbers(50000)".format(
i, i
)
)
2021-04-08 20:39:56 +00:00
instance.restart_clickhouse()
2022-01-07 21:27:46 +00:00
check_several_tables_are_synchronized(instance, NUM_TABLES)
2021-04-08 20:39:56 +00:00
2021-04-10 14:42:45 +00:00
def test_replica_identity_index(started_cluster):
2023-05-11 16:09:46 +00:00
pg_manager.create_postgres_table(
"postgresql_replica", template=postgres_table_template_3
2021-10-02 12:49:20 +00:00
)
2023-05-11 16:09:46 +00:00
pg_manager.execute("CREATE unique INDEX idx on postgresql_replica(key1, key2);")
pg_manager.execute(
"ALTER TABLE postgresql_replica REPLICA IDENTITY USING INDEX idx"
)
2021-04-10 14:42:45 +00:00
instance.query(
"INSERT INTO postgres_database.postgresql_replica SELECT number, number, number, number from numbers(50, 10)"
)
2022-01-07 21:27:46 +00:00
pg_manager.create_materialized_db(
ip=started_cluster.postgres_ip, port=started_cluster.postgres_port
2021-04-10 14:42:45 +00:00
)
instance.query(
"INSERT INTO postgres_database.postgresql_replica SELECT number, number, number, number from numbers(100, 10)"
)
2022-01-07 21:27:46 +00:00
check_tables_are_synchronized(instance, "postgresql_replica", order_by="key1")
2021-04-10 14:42:45 +00:00
2023-05-11 16:09:46 +00:00
pg_manager.execute("UPDATE postgresql_replica SET key1=key1-25 WHERE key1<100 ")
pg_manager.execute("UPDATE postgresql_replica SET key2=key2-25 WHERE key2>100 ")
pg_manager.execute(
"UPDATE postgresql_replica SET value1=value1+100 WHERE key1<100 "
)
pg_manager.execute(
"UPDATE postgresql_replica SET value2=value2+200 WHERE key2>100 "
)
2022-01-07 21:27:46 +00:00
check_tables_are_synchronized(instance, "postgresql_replica", order_by="key1")
2021-04-10 14:42:45 +00:00
2023-05-11 16:09:46 +00:00
pg_manager.execute("DELETE FROM postgresql_replica WHERE key2<75;")
2022-01-07 21:27:46 +00:00
check_tables_are_synchronized(instance, "postgresql_replica", order_by="key1")
2021-04-10 14:42:45 +00:00
2021-05-03 17:28:54 +00:00
def test_table_schema_changes(started_cluster):
NUM_TABLES = 5
for i in range(NUM_TABLES):
2023-05-11 16:09:46 +00:00
pg_manager.create_postgres_table(
f"postgresql_replica_{i}", template=postgres_table_template_2
2021-05-03 17:28:54 +00:00
)
instance.query(
2023-05-11 16:09:46 +00:00
f"INSERT INTO postgres_database.postgresql_replica_{i} SELECT number, {i}, {i}, {i} from numbers(25)"
)
2021-05-03 17:28:54 +00:00
2022-01-07 21:27:46 +00:00
pg_manager.create_materialized_db(
ip=started_cluster.postgres_ip,
port=started_cluster.postgres_port,
)
2021-05-03 17:28:54 +00:00
for i in range(NUM_TABLES):
instance.query(
2023-05-11 16:09:46 +00:00
f"INSERT INTO postgres_database.postgresql_replica_{i} SELECT 25 + number, {i}, {i}, {i} from numbers(25)"
)
2021-05-03 17:28:54 +00:00
2022-01-07 21:27:46 +00:00
check_several_tables_are_synchronized(instance, NUM_TABLES)
2021-05-03 17:28:54 +00:00
expected = instance.query(
"SELECT key, value1, value3 FROM test_database.postgresql_replica_3 ORDER BY key"
)
2021-05-03 17:28:54 +00:00
2021-10-08 20:52:15 +00:00
altered_idx = random.randint(0, 4)
altered_table = f"postgresql_replica_{altered_idx}"
2023-02-17 12:47:45 +00:00
prev_count = int(
instance.query(f"SELECT count() FROM test_database.{altered_table}")
)
2021-05-03 17:28:54 +00:00
2023-07-17 13:01:58 +00:00
pg_manager.execute(f"ALTER TABLE {altered_table} DROP COLUMN value2")
2021-05-03 17:28:54 +00:00
for i in range(NUM_TABLES):
2023-07-17 13:01:58 +00:00
pg_manager.execute(f"INSERT INTO postgresql_replica_{i} VALUES (50, {i}, {i})")
2021-05-03 17:28:54 +00:00
2023-02-16 16:22:29 +00:00
assert instance.wait_for_log_line(
f"Table postgresql_replica_{altered_idx} is skipped from replication stream"
)
2023-02-17 12:47:45 +00:00
assert prev_count == int(
instance.query(f"SELECT count() FROM test_database.{altered_table}")
)
2021-05-03 17:28:54 +00:00
2021-05-02 11:50:29 +00:00
2021-05-23 12:09:20 +00:00
def test_many_concurrent_queries(started_cluster):
2023-12-12 14:27:42 +00:00
table = "test_many_conc"
2021-05-23 12:09:20 +00:00
query_pool = [
2023-12-12 14:27:42 +00:00
"DELETE FROM {} WHERE (value*value) % 3 = 0;",
"UPDATE {} SET value = value - 125 WHERE key % 2 = 0;",
"DELETE FROM {} WHERE key % 10 = 0;",
"UPDATE {} SET value = value*5 WHERE key % 2 = 1;",
"DELETE FROM {} WHERE value % 2 = 0;",
"UPDATE {} SET value = value + 2000 WHERE key % 5 = 0;",
"DELETE FROM {} WHERE value % 3 = 0;",
"UPDATE {} SET value = value * 2 WHERE key % 3 = 0;",
"DELETE FROM {} WHERE value % 9 = 2;",
"UPDATE {} SET value = value + 2 WHERE key % 3 = 1;",
"DELETE FROM {} WHERE value%5 = 0;",
2021-05-10 13:51:05 +00:00
]
2021-05-10 11:31:06 +00:00
2022-01-07 21:27:46 +00:00
NUM_TABLES = 5
conn = get_postgres_conn(
ip=started_cluster.postgres_ip,
port=started_cluster.postgres_port,
database=True,
)
2023-09-04 15:56:14 +00:00
cursor = conn.cursor()
2023-12-12 14:43:44 +00:00
pg_manager.create_and_fill_postgres_tables(
NUM_TABLES, numbers=10000, table_name_base=table
)
2022-01-07 21:27:46 +00:00
2021-05-10 11:31:06 +00:00
def attack(thread_id):
print("thread {}".format(thread_id))
k = 10000
2021-05-23 12:09:20 +00:00
for i in range(20):
query_id = random.randrange(0, len(query_pool) - 1)
2021-05-10 11:31:06 +00:00
table_id = random.randrange(0, 5) # num tables
2023-12-12 14:27:42 +00:00
random_table_name = f"{table}_{table_id}"
table_name = f"{table}_{thread_id}"
2021-05-10 11:31:06 +00:00
# random update / delete query
2023-12-12 14:27:42 +00:00
cursor.execute(query_pool[query_id].format(random_table_name))
2023-12-14 13:19:46 +00:00
print(
"Executing for table {} query: {}".format(
random_table_name, query_pool[query_id]
)
)
2021-05-10 11:31:06 +00:00
# allow some thread to do inserts (not to violate key constraints)
if thread_id < 5:
2021-05-10 13:51:05 +00:00
print("try insert table {}".format(thread_id))
instance.query(
2023-12-12 14:27:42 +00:00
"INSERT INTO postgres_database.{} SELECT {}*10000*({} + number), number from numbers(1000)".format(
table_name, thread_id, k
2021-05-10 13:51:05 +00:00
)
)
2021-05-10 11:31:06 +00:00
k += 1
print("insert table {} ok".format(thread_id))
2021-05-10 13:51:05 +00:00
if i == 5:
# also change primary key value
print("try update primary key {}".format(thread_id))
cursor.execute(
"UPDATE {} SET key=key%100000+100000*{} WHERE key%{}=0".format(
2023-12-12 14:27:42 +00:00
table_name, i + 1, i + 1
2021-05-10 13:51:05 +00:00
)
)
2021-05-10 13:51:05 +00:00
print("update primary key {} ok".format(thread_id))
2022-01-07 21:27:46 +00:00
n = [10000]
2021-05-10 11:31:06 +00:00
threads = []
threads_num = 16
for i in range(threads_num):
threads.append(threading.Thread(target=attack, args=(i,)))
2021-05-23 12:09:20 +00:00
2022-01-07 21:27:46 +00:00
pg_manager.create_materialized_db(
ip=started_cluster.postgres_ip, port=started_cluster.postgres_port
)
2021-05-23 12:09:20 +00:00
2021-05-10 11:31:06 +00:00
for thread in threads:
time.sleep(random.uniform(0, 1))
thread.start()
n[0] = 50000
for table_id in range(NUM_TABLES):
n[0] += 1
2023-12-12 14:27:42 +00:00
table_name = f"{table}_{table_id}"
2021-05-10 11:31:06 +00:00
instance.query(
2023-12-12 14:27:42 +00:00
"INSERT INTO postgres_database.{} SELECT {} + number, number from numbers(5000)".format(
table_name, n[0]
2021-05-10 11:31:06 +00:00
)
)
2023-12-12 14:27:42 +00:00
# cursor.execute("UPDATE {table}_{} SET key=key%100000+100000*{} WHERE key%{}=0".format(table_id, table_id+1, table_id+1))
2021-05-10 11:31:06 +00:00
for thread in threads:
thread.join()
for i in range(NUM_TABLES):
2023-12-12 14:27:42 +00:00
table_name = f"{table}_{i}"
check_tables_are_synchronized(instance, table_name)
2021-05-23 12:09:20 +00:00
count1 = instance.query(
2023-12-12 14:27:42 +00:00
"SELECT count() FROM postgres_database.{}".format(table_name)
2021-05-23 12:09:20 +00:00
)
count2 = instance.query(
2023-12-12 14:27:42 +00:00
"SELECT count() FROM (SELECT * FROM test_database.{})".format(table_name)
)
2021-05-23 12:09:20 +00:00
assert int(count1) == int(count2)
print(count1, count2)
2021-07-05 03:53:48 +00:00
2021-05-10 11:31:06 +00:00
2021-05-10 14:51:17 +00:00
def test_single_transaction(started_cluster):
2022-01-07 21:27:46 +00:00
conn = get_postgres_conn(
ip=started_cluster.postgres_ip,
port=started_cluster.postgres_port,
2021-06-03 19:45:27 +00:00
database=True,
auto_commit=False,
)
2021-05-10 14:51:17 +00:00
cursor = conn.cursor()
2022-01-07 21:27:46 +00:00
table_name = "postgresql_replica_0"
create_postgres_table(cursor, table_name)
2021-05-10 14:51:17 +00:00
conn.commit()
2022-01-07 21:27:46 +00:00
pg_manager.create_materialized_db(
2021-06-03 19:45:27 +00:00
ip=started_cluster.postgres_ip, port=started_cluster.postgres_port
)
2022-01-07 21:27:46 +00:00
assert_nested_table_is_created(instance, table_name)
2021-05-10 14:51:17 +00:00
for query in queries:
print("query {}".format(query))
cursor.execute(query.format(0))
time.sleep(5)
2022-01-07 21:27:46 +00:00
result = instance.query(f"select count() from test_database.{table_name}")
2021-05-10 14:51:17 +00:00
# no commit yet
assert int(result) == 0
conn.commit()
2022-01-07 21:27:46 +00:00
check_tables_are_synchronized(instance, table_name)
2021-05-13 07:36:40 +00:00
def test_virtual_columns(started_cluster):
2022-01-07 21:27:46 +00:00
conn = get_postgres_conn(
ip=started_cluster.postgres_ip,
port=started_cluster.postgres_port,
2021-06-03 19:45:27 +00:00
database=True,
)
2021-05-13 07:36:40 +00:00
cursor = conn.cursor()
2022-01-07 21:27:46 +00:00
table_name = "postgresql_replica_0"
create_postgres_table(cursor, table_name)
2021-05-13 07:36:40 +00:00
2022-01-07 21:27:46 +00:00
pg_manager.create_materialized_db(
ip=started_cluster.postgres_ip,
port=started_cluster.postgres_port,
)
assert_nested_table_is_created(instance, table_name)
instance.query(
f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers(10)"
)
check_tables_are_synchronized(instance, table_name)
2021-05-13 07:36:40 +00:00
# just check that it works, no check with `expected` because _version is taken as LSN, which will be different each time.
2022-01-07 21:27:46 +00:00
result = instance.query(
f"SELECT key, value, _sign, _version FROM test_database.{table_name};"
)
2021-05-13 07:36:40 +00:00
print(result)
2021-05-23 12:09:20 +00:00
2021-06-26 22:05:20 +00:00
def test_multiple_databases(started_cluster):
NUM_TABLES = 5
2022-01-07 21:27:46 +00:00
conn = get_postgres_conn(
ip=started_cluster.postgres_ip,
port=started_cluster.postgres_port,
2021-06-27 16:15:28 +00:00
database=False,
)
2023-07-17 13:01:58 +00:00
pg_manager.create_postgres_db("postgres_database_1")
pg_manager.create_postgres_db("postgres_database_2")
2022-01-07 21:27:46 +00:00
conn1 = get_postgres_conn(
ip=started_cluster.postgres_ip,
port=started_cluster.postgres_port,
2021-06-27 16:15:28 +00:00
database=True,
database_name="postgres_database_1",
)
2022-01-07 21:27:46 +00:00
conn2 = get_postgres_conn(
ip=started_cluster.postgres_ip,
port=started_cluster.postgres_port,
2021-06-27 16:15:28 +00:00
database=True,
database_name="postgres_database_2",
)
2021-06-26 22:05:20 +00:00
cursor1 = conn1.cursor()
cursor2 = conn2.cursor()
2022-01-07 21:27:46 +00:00
pg_manager.create_clickhouse_postgres_db(
"postgres_database_1",
2023-07-17 13:01:58 +00:00
"",
2022-01-07 21:27:46 +00:00
"postgres_database_1",
)
pg_manager.create_clickhouse_postgres_db(
"postgres_database_2",
2023-07-17 13:01:58 +00:00
"",
2022-01-07 21:27:46 +00:00
"postgres_database_2",
)
2021-06-26 22:05:20 +00:00
cursors = [cursor1, cursor2]
for cursor_id in range(len(cursors)):
for i in range(NUM_TABLES):
table_name = "postgresql_replica_{}".format(i)
create_postgres_table(cursors[cursor_id], table_name)
instance.query(
"INSERT INTO postgres_database_{}.{} SELECT number, number from numbers(50)".format(
cursor_id + 1, table_name
)
)
print(
"database 1 tables: ",
instance.query(
"""SELECT name FROM system.tables WHERE database = 'postgres_database_1';"""
),
2021-06-26 22:05:20 +00:00
)
print(
"database 2 tables: ",
instance.query(
"""SELECT name FROM system.tables WHERE database = 'postgres_database_2';"""
),
)
2021-06-26 22:05:20 +00:00
2022-01-07 21:27:46 +00:00
pg_manager.create_materialized_db(
started_cluster.postgres_ip,
started_cluster.postgres_port,
2021-06-27 16:15:28 +00:00
"test_database_1",
"postgres_database_1",
)
2022-01-07 21:27:46 +00:00
pg_manager.create_materialized_db(
started_cluster.postgres_ip,
started_cluster.postgres_port,
2021-06-27 16:15:28 +00:00
"test_database_2",
"postgres_database_2",
)
2021-06-26 22:05:20 +00:00
cursors = [cursor1, cursor2]
for cursor_id in range(len(cursors)):
for i in range(NUM_TABLES):
table_name = "postgresql_replica_{}".format(i)
instance.query(
"INSERT INTO postgres_database_{}.{} SELECT 50 + number, number from numbers(50)".format(
cursor_id + 1, table_name
)
)
2021-06-26 22:05:20 +00:00
for cursor_id in range(len(cursors)):
for i in range(NUM_TABLES):
table_name = "postgresql_replica_{}".format(i)
2022-01-07 21:27:46 +00:00
check_tables_are_synchronized(
instance,
2021-06-26 22:05:20 +00:00
table_name,
"key",
"postgres_database_{}".format(cursor_id + 1),
"test_database_{}".format(cursor_id + 1),
)
2021-06-26 22:05:20 +00:00
def test_concurrent_transactions(started_cluster):
def transaction(thread_id):
2021-06-27 16:15:28 +00:00
conn = get_postgres_conn(
ip=started_cluster.postgres_ip,
port=started_cluster.postgres_port,
database=True,
auto_commit=False,
)
2022-01-07 21:27:46 +00:00
cursor = conn.cursor()
2021-06-26 22:05:20 +00:00
for query in queries:
2022-01-07 21:27:46 +00:00
cursor.execute(query.format(thread_id))
2021-06-26 22:05:20 +00:00
print("thread {}, query {}".format(thread_id, query))
2021-06-27 16:15:28 +00:00
conn.commit()
2021-06-26 22:05:20 +00:00
2022-01-07 21:27:46 +00:00
NUM_TABLES = 6
pg_manager.create_and_fill_postgres_tables(NUM_TABLES, numbers=0)
2021-06-26 22:05:20 +00:00
threads = []
threads_num = 6
for i in range(threads_num):
threads.append(threading.Thread(target=transaction, args=(i,)))
2022-01-07 21:27:46 +00:00
pg_manager.create_materialized_db(
ip=started_cluster.postgres_ip, port=started_cluster.postgres_port
)
2021-06-26 22:05:20 +00:00
for thread in threads:
time.sleep(random.uniform(0, 0.5))
thread.start()
2022-01-07 21:27:46 +00:00
2021-06-26 22:05:20 +00:00
for thread in threads:
thread.join()
for i in range(NUM_TABLES):
2022-01-07 21:27:46 +00:00
check_tables_are_synchronized(instance, f"postgresql_replica_{i}")
count1 = instance.query(
f"SELECT count() FROM postgres_database.postgresql_replica_{i}"
)
count2 = instance.query(
f"SELECT count() FROM (SELECT * FROM test_database.postgresql_replica_{i})"
)
2021-06-26 22:05:20 +00:00
print(int(count1), int(count2), sep=" ")
assert int(count1) == int(count2)
2021-07-05 03:53:48 +00:00
2021-05-10 14:51:17 +00:00
2021-06-29 23:11:46 +00:00
def test_abrupt_connection_loss_while_heavy_replication(started_cluster):
def transaction(thread_id):
if thread_id % 2:
2022-01-07 21:27:46 +00:00
conn = get_postgres_conn(
ip=started_cluster.postgres_ip,
port=started_cluster.postgres_port,
2021-06-29 23:11:46 +00:00
database=True,
auto_commit=True,
)
else:
2022-01-07 21:27:46 +00:00
conn = get_postgres_conn(
ip=started_cluster.postgres_ip,
port=started_cluster.postgres_port,
2021-06-29 23:11:46 +00:00
database=True,
auto_commit=False,
)
2022-01-07 21:27:46 +00:00
cursor = conn.cursor()
2021-06-29 23:11:46 +00:00
for query in queries:
2022-01-07 21:27:46 +00:00
cursor.execute(query.format(thread_id))
2021-06-29 23:11:46 +00:00
print("thread {}, query {}".format(thread_id, query))
if thread_id % 2 == 0:
conn.commit()
2022-01-07 21:27:46 +00:00
NUM_TABLES = 6
pg_manager.create_and_fill_postgres_tables(NUM_TABLES, numbers=0)
2021-06-29 23:11:46 +00:00
threads_num = 6
2022-01-07 21:27:46 +00:00
threads = []
2021-06-29 23:11:46 +00:00
for i in range(threads_num):
threads.append(threading.Thread(target=transaction, args=(i,)))
2022-01-07 21:27:46 +00:00
pg_manager.create_materialized_db(
ip=started_cluster.postgres_ip, port=started_cluster.postgres_port
)
2021-06-29 23:11:46 +00:00
for thread in threads:
time.sleep(random.uniform(0, 0.5))
thread.start()
for thread in threads:
2022-01-07 21:27:46 +00:00
thread.join() # Join here because it takes time for data to reach wal
2022-01-08 12:26:29 +00:00
time.sleep(2)
2021-06-29 23:11:46 +00:00
started_cluster.pause_container("postgres1")
2022-01-08 12:26:29 +00:00
# for i in range(NUM_TABLES):
# result = instance.query(f"SELECT count() FROM test_database.postgresql_replica_{i}")
# print(result) # Just debug
2021-06-29 23:11:46 +00:00
started_cluster.unpause_container("postgres1")
2022-01-07 21:27:46 +00:00
check_several_tables_are_synchronized(instance, NUM_TABLES)
2021-06-29 23:11:46 +00:00
def test_drop_database_while_replication_startup_not_finished(started_cluster):
NUM_TABLES = 5
2022-01-07 21:27:46 +00:00
pg_manager.create_and_fill_postgres_tables(NUM_TABLES, 100000)
2021-06-29 23:11:46 +00:00
for i in range(6):
2022-01-07 21:27:46 +00:00
pg_manager.create_materialized_db(
ip=started_cluster.postgres_ip, port=started_cluster.postgres_port
)
2021-06-29 23:11:46 +00:00
time.sleep(0.5 * i)
2022-01-07 21:27:46 +00:00
pg_manager.drop_materialized_db()
2021-07-05 03:53:48 +00:00
2021-06-29 23:11:46 +00:00
def test_restart_server_while_replication_startup_not_finished(started_cluster):
NUM_TABLES = 5
2022-01-07 21:27:46 +00:00
pg_manager.create_and_fill_postgres_tables(NUM_TABLES, 100000)
pg_manager.create_materialized_db(
ip=started_cluster.postgres_ip, port=started_cluster.postgres_port
)
time.sleep(1)
2021-06-29 23:11:46 +00:00
instance.restart_clickhouse()
2022-01-07 21:27:46 +00:00
check_several_tables_are_synchronized(instance, NUM_TABLES)
2021-06-29 23:11:46 +00:00
def test_abrupt_server_restart_while_heavy_replication(started_cluster):
2021-07-01 07:33:58 +00:00
def transaction(thread_id):
2021-07-01 08:20:13 +00:00
if thread_id % 2:
2022-01-07 21:27:46 +00:00
conn = get_postgres_conn(
ip=started_cluster.postgres_ip,
port=started_cluster.postgres_port,
2021-07-01 08:20:13 +00:00
database=True,
auto_commit=True,
)
else:
2022-01-07 21:27:46 +00:00
conn = get_postgres_conn(
ip=started_cluster.postgres_ip,
port=started_cluster.postgres_port,
2021-07-01 08:20:13 +00:00
database=True,
auto_commit=False,
)
2022-01-07 21:27:46 +00:00
cursor = conn.cursor()
2021-07-01 07:33:58 +00:00
for query in queries:
2022-01-07 21:27:46 +00:00
cursor.execute(query.format(thread_id))
2021-07-01 07:33:58 +00:00
print("thread {}, query {}".format(thread_id, query))
2021-07-01 08:20:13 +00:00
if thread_id % 2 == 0:
conn.commit()
2021-07-01 07:33:58 +00:00
2022-01-07 21:27:46 +00:00
NUM_TABLES = 6
pg_manager.create_and_fill_postgres_tables(tables_num=NUM_TABLES, numbers=0)
2021-07-01 07:33:58 +00:00
threads = []
2021-07-01 08:20:13 +00:00
threads_num = 6
2021-07-01 07:33:58 +00:00
for i in range(threads_num):
threads.append(threading.Thread(target=transaction, args=(i,)))
2022-01-07 21:27:46 +00:00
pg_manager.create_materialized_db(
ip=started_cluster.postgres_ip, port=started_cluster.postgres_port
)
2021-07-01 07:33:58 +00:00
for thread in threads:
time.sleep(random.uniform(0, 0.5))
thread.start()
for thread in threads:
2022-01-07 21:27:46 +00:00
thread.join() # Join here because it takes time for data to reach wal
2021-07-01 07:33:58 +00:00
2022-01-07 21:27:46 +00:00
instance.restart_clickhouse()
check_several_tables_are_synchronized(instance, NUM_TABLES)
2021-06-29 23:11:46 +00:00
2021-12-11 21:47:21 +00:00
def test_quoting_1(started_cluster):
table_name = "user"
2022-01-07 21:27:46 +00:00
pg_manager.create_and_fill_postgres_table(table_name)
pg_manager.create_materialized_db(
ip=started_cluster.postgres_ip, port=started_cluster.postgres_port
)
check_tables_are_synchronized(instance, table_name)
2021-12-11 21:47:21 +00:00
def test_quoting_2(started_cluster):
table_name = "user"
2022-01-07 21:27:46 +00:00
pg_manager.create_and_fill_postgres_table(table_name)
pg_manager.create_materialized_db(
ip=started_cluster.postgres_ip,
port=started_cluster.postgres_port,
settings=[f"materialized_postgresql_tables_list = '{table_name}'"],
)
check_tables_are_synchronized(instance, table_name)
2021-08-31 20:58:00 +00:00
2021-09-04 10:07:59 +00:00
def test_user_managed_slots(started_cluster):
2022-01-07 21:27:46 +00:00
slot_name = "user_slot"
2021-09-04 10:07:59 +00:00
table_name = "test_table"
2022-01-07 21:27:46 +00:00
pg_manager.create_and_fill_postgres_table(table_name)
2021-09-04 10:07:59 +00:00
2022-01-07 21:27:46 +00:00
replication_connection = get_postgres_conn(
ip=started_cluster.postgres_ip,
port=started_cluster.postgres_port,
database=True,
replication=True,
auto_commit=True,
)
2021-09-04 10:07:59 +00:00
snapshot = create_replication_slot(replication_connection, slot_name=slot_name)
2022-01-07 21:27:46 +00:00
pg_manager.create_materialized_db(
ip=started_cluster.postgres_ip,
port=started_cluster.postgres_port,
settings=[
f"materialized_postgresql_replication_slot = '{slot_name}'",
f"materialized_postgresql_snapshot = '{snapshot}'",
],
2022-01-07 21:27:46 +00:00
)
check_tables_are_synchronized(instance, table_name)
2021-09-04 10:07:59 +00:00
instance.query(
"INSERT INTO postgres_database.{} SELECT number, number from numbers(10000, 10000)".format(
table_name
)
)
2022-01-07 21:27:46 +00:00
check_tables_are_synchronized(instance, table_name)
2021-09-04 10:07:59 +00:00
instance.restart_clickhouse()
2022-01-07 21:27:46 +00:00
2021-09-04 10:07:59 +00:00
instance.query(
"INSERT INTO postgres_database.{} SELECT number, number from numbers(20000, 10000)".format(
table_name
)
)
2022-01-07 21:27:46 +00:00
check_tables_are_synchronized(instance, table_name)
pg_manager.drop_materialized_db()
2021-09-04 20:55:59 +00:00
drop_replication_slot(replication_connection, slot_name)
2022-01-07 21:27:46 +00:00
replication_connection.close()
2021-09-04 10:07:59 +00:00
2021-02-08 23:23:51 +00:00
if __name__ == "__main__":
cluster.start()
input("Cluster created, press any key to destroy...")
cluster.shutdown()