Remove cluster type check during alter

This commit is contained in:
alesapin 2020-10-16 14:59:58 +03:00
parent e89a3b5d09
commit dc1a5abea9
5 changed files with 122 additions and 38 deletions

View File

@ -787,12 +787,6 @@ void DDLWorker::processTask(DDLTask & task)
storage = DatabaseCatalog::instance().tryGetTable(table_id, context);
}
/// For some reason we check consistency of cluster definition only
/// in case of ALTER query, but not in case of CREATE/DROP etc.
/// It's strange, but this behaviour exits for a long and we cannot change it.
if (storage && query_with_table->as<ASTAlterQuery>())
checkShardConfig(query_with_table->table, task, storage);
if (storage && taskShouldBeExecutedOnLeader(rewritten_ast, storage) && !is_circular_replicated)
tryExecuteQueryOnLeaderReplica(task, storage, rewritten_query, task.entry_path, zookeeper);
else
@ -837,35 +831,6 @@ bool DDLWorker::taskShouldBeExecutedOnLeader(const ASTPtr ast_ddl, const Storage
return storage->supportsReplication();
}
void DDLWorker::checkShardConfig(const String & table, const DDLTask & task, StoragePtr storage) const
{
const auto & shard_info = task.cluster->getShardsInfo().at(task.host_shard_num);
bool config_is_replicated_shard = shard_info.hasInternalReplication();
if (dynamic_cast<const StorageDistributed *>(storage.get()))
{
LOG_TRACE(log, "Table {} is distributed, skip checking config.", backQuote(table));
return;
}
if (storage->supportsReplication() && !config_is_replicated_shard)
{
throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION,
"Table {} is replicated, but shard #{} isn't replicated according to its cluster definition. "
"Possibly <internal_replication>true</internal_replication> is forgotten in the cluster config.",
backQuote(table), task.host_shard_num + 1);
}
if (!storage->supportsReplication() && config_is_replicated_shard)
{
throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION,
"Table {} isn't replicated, but shard #{} is replicated according to its cluster definition",
backQuote(table), task.host_shard_num + 1);
}
}
bool DDLWorker::tryExecuteQueryOnLeaderReplica(
DDLTask & task,
StoragePtr storage,

View File

@ -75,9 +75,6 @@ private:
/// Check that query should be executed on leader replica only
static bool taskShouldBeExecutedOnLeader(const ASTPtr ast_ddl, StoragePtr storage);
/// Check that shard has consistent config with table
void checkShardConfig(const String & table, const DDLTask & task, StoragePtr storage) const;
/// Executes query only on leader replica in case of replicated table.
/// Queries like TRUNCATE/ALTER .../OPTIMIZE have to be executed only on one node of shard.
/// Most of these queries can be executed on non-leader replica, but actually they still send

View File

@ -0,0 +1 @@
#!/usr/bin/env python3

View File

@ -0,0 +1,28 @@
<yandex>
<remote_servers>
<test_cluster_mixed>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>node1</host>
<port>9000</port>
</replica>
<replica>
<host>node2</host>
<port>9000</port>
</replica>
</shard>
<shard>
<internal_replication>false</internal_replication>
<replica>
<host>node3</host>
<port>9000</port>
</replica>
<replica>
<host>node4</host>
<port>9000</port>
</replica>
</shard>
</test_cluster_mixed>
</remote_servers>
</yandex>

View File

@ -0,0 +1,93 @@
import pytest
from helpers.cluster import ClickHouseCluster
from helpers.test_tools import assert_eq_with_retry
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml'], with_zookeeper=True)
node2 = cluster.add_instance('node2', main_configs=['configs/remote_servers.xml'], with_zookeeper=True)
node3 = cluster.add_instance('node3', main_configs=['configs/remote_servers.xml'], with_zookeeper=True)
node4 = cluster.add_instance('node4', main_configs=['configs/remote_servers.xml'], with_zookeeper=True)
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
for node in [node1, node2]:
node.query('''
CREATE TABLE test_table_replicated(date Date, id UInt32, value Int32)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/sometable', '{replica}') ORDER BY id;
'''.format(replica=node.name))
node.query('''CREATE TABLE test_table(date Date, id UInt32, value Int32) ENGINE=MergeTree ORDER BY id''')
for node in [node3, node4]:
node.query('''
CREATE TABLE test_table_replicated(date Date, id UInt32, value Int32)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/1/someotable', '{replica}') ORDER BY id;
'''.format(replica=node.name))
node.query('''CREATE TABLE test_table(date Date, id UInt32, value Int32) ENGINE=MergeTree ORDER BY id''')
yield cluster
finally:
cluster.shutdown()
def test_alter_on_cluter_non_replicated(started_cluster):
for node in [node1, node2, node3, node4]:
node.query("INSERT INTO test_table VALUES(toDate('2019-10-01'), 1, 1)")
assert node1.query("SELECT COUNT() FROM test_table") == "1\n"
assert node2.query("SELECT COUNT() FROM test_table") == "1\n"
assert node3.query("SELECT COUNT() FROM test_table") == "1\n"
assert node4.query("SELECT COUNT() FROM test_table") == "1\n"
node1.query("ALTER TABLE test_table ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN date DateTime")
assert node1.query("SELECT date FROM test_table") == '2019-10-01 00:00:00\n'
assert node2.query("SELECT date FROM test_table") == '2019-10-01 00:00:00\n'
assert node3.query("SELECT date FROM test_table") == '2019-10-01 00:00:00\n'
assert node4.query("SELECT date FROM test_table") == '2019-10-01 00:00:00\n'
node3.query("ALTER TABLE test_table ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN value String")
for node in [node1, node2, node3, node4]:
node.query("INSERT INTO test_table VALUES(toDateTime('2019-10-02 00:00:00'), 2, 'Hello')")
assert node1.query("SELECT COUNT() FROM test_table") == "2\n"
assert node2.query("SELECT COUNT() FROM test_table") == "2\n"
assert node3.query("SELECT COUNT() FROM test_table") == "2\n"
assert node4.query("SELECT COUNT() FROM test_table") == "2\n"
def test_alter_replicated_on_cluster(started_cluster):
for node in [node1, node3]:
node.query("INSERT INTO test_table_replicated VALUES(toDate('2019-10-01'), 1, 1)")
for node in [node2, node4]:
node.query("SYSTEM SYNC REPLICA test_table_replicated", timeout=20)
node1.query("ALTER TABLE test_table_replicated ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN date DateTime", settings={"replication_alter_partitions_sync": "2"})
assert node1.query("SELECT date FROM test_table_replicated") == '2019-10-01 00:00:00\n'
assert node2.query("SELECT date FROM test_table_replicated") == '2019-10-01 00:00:00\n'
assert node3.query("SELECT date FROM test_table_replicated") == '2019-10-01 00:00:00\n'
assert node4.query("SELECT date FROM test_table_replicated") == '2019-10-01 00:00:00\n'
node3.query("ALTER TABLE test_table_replicated ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN value String", settings={"replication_alter_partitions_sync": "2"})
for node in [node2, node4]:
node.query("INSERT INTO test_table_replicated VALUES(toDateTime('2019-10-02 00:00:00'), 2, 'Hello')")
for node in [node1, node3]:
node.query("SYSTEM SYNC REPLICA test_table_replicated", timeout=20)
assert node1.query("SELECT COUNT() FROM test_table_replicated") == "2\n"
assert node2.query("SELECT COUNT() FROM test_table_replicated") == "2\n"
assert node3.query("SELECT COUNT() FROM test_table_replicated") == "2\n"
assert node4.query("SELECT COUNT() FROM test_table_replicated") == "2\n"