Remove cluster type check during alter

2024-11-21 15:12:02 +00:00 · 2020-10-16 14:59:58 +03:00 · 2020-10-16 14:59:58 +03:00 · dc1a5abea9
commit dc1a5abea9
parent e89a3b5d09
5 changed files with 122 additions and 38 deletions
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@ -787,12 +787,6 @@ void DDLWorker::processTask(DDLTask & task)
                    storage = DatabaseCatalog::instance().tryGetTable(table_id, context);
                }

-                /// For some reason we check consistency of cluster definition only
-                /// in case of ALTER query, but not in case of CREATE/DROP etc.
-                /// It's strange, but this behaviour exits for a long and we cannot change it.
-                if (storage && query_with_table->as<ASTAlterQuery>())
-                    checkShardConfig(query_with_table->table, task, storage);
-
                if (storage && taskShouldBeExecutedOnLeader(rewritten_ast, storage)  && !is_circular_replicated)
                    tryExecuteQueryOnLeaderReplica(task, storage, rewritten_query, task.entry_path, zookeeper);
                else
@ -837,35 +831,6 @@ bool DDLWorker::taskShouldBeExecutedOnLeader(const ASTPtr ast_ddl, const Storage
    return storage->supportsReplication();
 }

-
-void DDLWorker::checkShardConfig(const String & table, const DDLTask & task, StoragePtr storage) const
-{
-    const auto & shard_info = task.cluster->getShardsInfo().at(task.host_shard_num);
-    bool config_is_replicated_shard = shard_info.hasInternalReplication();
-
-    if (dynamic_cast<const StorageDistributed *>(storage.get()))
-    {
-        LOG_TRACE(log, "Table {} is distributed, skip checking config.", backQuote(table));
-        return;
-    }
-
-    if (storage->supportsReplication() && !config_is_replicated_shard)
-    {
-        throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION,
-            "Table {} is replicated, but shard #{} isn't replicated according to its cluster definition. "
-            "Possibly <internal_replication>true</internal_replication> is forgotten in the cluster config.",
-            backQuote(table), task.host_shard_num + 1);
-    }
-
-    if (!storage->supportsReplication() && config_is_replicated_shard)
-    {
-        throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION,
-            "Table {} isn't replicated, but shard #{} is replicated according to its cluster definition",
-            backQuote(table), task.host_shard_num + 1);
-    }
-}
-
-
 bool DDLWorker::tryExecuteQueryOnLeaderReplica(
    DDLTask & task,
    StoragePtr storage,
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@ -75,9 +75,6 @@ private:
    /// Check that query should be executed on leader replica only
    static bool taskShouldBeExecutedOnLeader(const ASTPtr ast_ddl, StoragePtr storage);

-    /// Check that shard has consistent config with table
-    void checkShardConfig(const String & table, const DDLTask & task, StoragePtr storage) const;
-
    /// Executes query only on leader replica in case of replicated table.
    /// Queries like TRUNCATE/ALTER .../OPTIMIZE have to be executed only on one node of shard.
    /// Most of these queries can be executed on non-leader replica, but actually they still send
--- a/tests/integration/test_alter_on_mixed_type_cluster/init.py
+++ b/tests/integration/test_alter_on_mixed_type_cluster/init.py
@ -0,0 +1 @@
+#!/usr/bin/env python3
--- a/tests/integration/test_alter_on_mixed_type_cluster/configs/remote_servers.xml
+++ b/tests/integration/test_alter_on_mixed_type_cluster/configs/remote_servers.xml
@ -0,0 +1,28 @@
+<yandex>
+    <remote_servers>
+        <test_cluster_mixed>
+            <shard>
+                <internal_replication>true</internal_replication>
+                <replica>
+                    <host>node1</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>node2</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+            <shard>
+                <internal_replication>false</internal_replication>
+                <replica>
+                    <host>node3</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>node4</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </test_cluster_mixed>
+    </remote_servers>
+</yandex>
--- a/tests/integration/test_alter_on_mixed_type_cluster/test.py
+++ b/tests/integration/test_alter_on_mixed_type_cluster/test.py
@ -0,0 +1,93 @@
+import pytest
+
+from helpers.cluster import ClickHouseCluster
+from helpers.test_tools import assert_eq_with_retry
+
+cluster = ClickHouseCluster(__file__)
+
+node1 = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml'], with_zookeeper=True)
+node2 = cluster.add_instance('node2', main_configs=['configs/remote_servers.xml'], with_zookeeper=True)
+node3 = cluster.add_instance('node3', main_configs=['configs/remote_servers.xml'], with_zookeeper=True)
+node4 = cluster.add_instance('node4', main_configs=['configs/remote_servers.xml'], with_zookeeper=True)
+
+
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+
+        for node in [node1, node2]:
+            node.query('''
+            CREATE TABLE test_table_replicated(date Date, id UInt32, value Int32)
+    ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/sometable', '{replica}') ORDER BY id;
+                '''.format(replica=node.name))
+            node.query('''CREATE TABLE test_table(date Date, id UInt32, value Int32) ENGINE=MergeTree ORDER BY id''')
+
+        for node in [node3, node4]:
+            node.query('''
+            CREATE TABLE test_table_replicated(date Date, id UInt32, value Int32)
+    ENGINE = ReplicatedMergeTree('/clickhouse/tables/1/someotable', '{replica}') ORDER BY id;
+                '''.format(replica=node.name))
+
+            node.query('''CREATE TABLE test_table(date Date, id UInt32, value Int32) ENGINE=MergeTree ORDER BY id''')
+
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+
+def test_alter_on_cluter_non_replicated(started_cluster):
+    for node in [node1, node2, node3, node4]:
+        node.query("INSERT INTO test_table VALUES(toDate('2019-10-01'), 1, 1)")
+
+    assert node1.query("SELECT COUNT() FROM test_table") == "1\n"
+    assert node2.query("SELECT COUNT() FROM test_table") == "1\n"
+    assert node3.query("SELECT COUNT() FROM test_table") == "1\n"
+    assert node4.query("SELECT COUNT() FROM test_table") == "1\n"
+
+    node1.query("ALTER TABLE test_table ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN date DateTime")
+
+    assert node1.query("SELECT date FROM test_table") == '2019-10-01 00:00:00\n'
+    assert node2.query("SELECT date FROM test_table") == '2019-10-01 00:00:00\n'
+    assert node3.query("SELECT date FROM test_table") == '2019-10-01 00:00:00\n'
+    assert node4.query("SELECT date FROM test_table") == '2019-10-01 00:00:00\n'
+
+    node3.query("ALTER TABLE test_table ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN value String")
+
+    for node in [node1, node2, node3, node4]:
+        node.query("INSERT INTO test_table VALUES(toDateTime('2019-10-02 00:00:00'), 2, 'Hello')")
+
+    assert node1.query("SELECT COUNT() FROM test_table") == "2\n"
+    assert node2.query("SELECT COUNT() FROM test_table") == "2\n"
+    assert node3.query("SELECT COUNT() FROM test_table") == "2\n"
+    assert node4.query("SELECT COUNT() FROM test_table") == "2\n"
+
+
+def test_alter_replicated_on_cluster(started_cluster):
+    for node in [node1, node3]:
+        node.query("INSERT INTO test_table_replicated VALUES(toDate('2019-10-01'), 1, 1)")
+
+    for node in [node2, node4]:
+        node.query("SYSTEM SYNC REPLICA test_table_replicated", timeout=20)
+
+    node1.query("ALTER TABLE test_table_replicated ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN date DateTime", settings={"replication_alter_partitions_sync": "2"})
+
+    assert node1.query("SELECT date FROM test_table_replicated") == '2019-10-01 00:00:00\n'
+    assert node2.query("SELECT date FROM test_table_replicated") == '2019-10-01 00:00:00\n'
+    assert node3.query("SELECT date FROM test_table_replicated") == '2019-10-01 00:00:00\n'
+    assert node4.query("SELECT date FROM test_table_replicated") == '2019-10-01 00:00:00\n'
+
+    node3.query("ALTER TABLE test_table_replicated ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN value String", settings={"replication_alter_partitions_sync": "2"})
+
+    for node in [node2, node4]:
+        node.query("INSERT INTO test_table_replicated VALUES(toDateTime('2019-10-02 00:00:00'), 2, 'Hello')")
+
+    for node in [node1, node3]:
+        node.query("SYSTEM SYNC REPLICA test_table_replicated", timeout=20)
+
+    assert node1.query("SELECT COUNT() FROM test_table_replicated") == "2\n"
+    assert node2.query("SELECT COUNT() FROM test_table_replicated") == "2\n"
+    assert node3.query("SELECT COUNT() FROM test_table_replicated") == "2\n"
+    assert node4.query("SELECT COUNT() FROM test_table_replicated") == "2\n"