Fix flakiness of test_version_update_after_mutation by enabling force_remove_data_recursively_on_drop

Since there can be some leftovers: 2023.07.24 07:08:25.238066 [ 140 ] {} <Error> Application: Code: 219. DB::Exception: Cannot drop: filesystem error: in remove: Directory not empty ["/var/lib/clickhouse/data/system/"]. Probably database contain some detached tables or metadata leftovers from Ordinary engine. If you want to remove all data anyway, try to attach database back and drop it again with enabled force_remove_data_recursively_on_drop setting: Exception while trying to convert database system from Ordinary to Atomic. It may be in some intermediate state. You can finish conversion manually by moving the rest tables from system to .tmp_convert.system.9396432095832455195 (using RENAME TABLE) and executing DROP DATABASE system and RENAME DATABASE .tmp_convert.system.9396432095832455195 TO system. (DATABASE_NOT_EMPTY), Stack trace (when copying this message, always include the lines below): 0. DB::Exception::Exception(DB::Exception::MessageMasked&&, int, bool) @ 0x000000000e68af57 in /usr/bin/clickhouse 1. ? @ 0x000000000cab443c in /usr/bin/clickhouse 2. DB::DatabaseOnDisk::drop(std::shared_ptr<DB::Context const>) @ 0x000000001328d617 in /usr/bin/clickhouse 3. DB::DatabaseCatalog::detachDatabase(std::shared_ptr<DB::Context const>, String const&, bool, bool) @ 0x0000000013524a6c in /usr/bin/clickhouse 4. DB::InterpreterDropQuery::executeToDatabaseImpl(DB::ASTDropQuery const&, std::shared_ptr<DB::IDatabase>&, std::vector<StrongTypedef<wide::integer<128ul, unsigned int>, DB::UUIDTag>, std::allocator<StrongTypedef<wide::integer<128ul, unsigned int>, DB::UUIDTag>>>&) @ 0x0000000013bc05e4 in /usr/bin/clickhouse 5. DB::InterpreterDropQuery::executeToDatabase(DB::ASTDropQuery const&) @ 0x0000000013bbc6b8 in /usr/bin/clickhouse 6. DB::InterpreterDropQuery::execute() @ 0x0000000013bbba22 in /usr/bin/clickhouse 7. ? @ 0x00000000140b13a5 in /usr/bin/clickhouse 8. DB::executeQuery(String const&, std::shared_ptr<DB::Context>, bool, DB::QueryProcessingStage::Enum) @ 0x00000000140ad20e in /usr/bin/clickhouse 9. ? @ 0x00000000140d2ef0 in /usr/bin/clickhouse 10. DB::maybeConvertSystemDatabase(std::shared_ptr<DB::Context>) @ 0x00000000140d0aaf in /usr/bin/clickhouse 11. DB::Server::main(std::vector<String, std::allocator<String>> const&) @ 0x000000000e724e55 in /usr/bin/clickhouse 12. Poco::Util::Application::run() @ 0x0000000017ead086 in /usr/bin/clickhouse 13. DB::Server::run() @ 0x000000000e714a5d in /usr/bin/clickhouse 14. Poco::Util::ServerApplication::run(int, char**) @ 0x0000000017ec07b9 in /usr/bin/clickhouse 15. mainEntryClickHouseServer(int, char**) @ 0x000000000e711a26 in /usr/bin/clickhouse 16. main @ 0x0000000008cf13cf in /usr/bin/clickhouse 17. __libc_start_main @ 0x0000000000021b97 in /lib/x86_64-linux-gnu/libc-2.27.so 18. _start @ 0x00000000080705ae in /usr/bin/clickhouse (version 23.7.1.2012) Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2024-11-10 09:32:06 +00:00 · 2023-07-24 10:14:23 +02:00 · 2023-07-24 10:14:23 +02:00 · 0401dc453e
commit 0401dc453e
parent f17844e9c2
3 changed files with 26 additions and 3 deletions
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@ -3199,6 +3199,7 @@ class ClickHouseInstance:
    ):
        self.name = name
        self.base_cmd = cluster.base_cmd
+        self.base_dir = base_path
        self.docker_id = cluster.get_instance_docker_id(self.name)
        self.cluster = cluster
        self.hostname = hostname if hostname is not None else self.name
@ -4193,6 +4194,14 @@ class ClickHouseInstance:
            ["bash", "-c", f"sed -i 's/{replace}/{replacement}/g' {path_to_config}"]
        )

+    def put_users_config(self, config_path):
+        """Put new config (useful if you cannot put it at the start)"""
+
+        instance_config_dir = p.abspath(p.join(self.path, "configs"))
+        users_d_dir = p.abspath(p.join(instance_config_dir, "users.d"))
+        config_path = p.join(self.base_dir, config_path)
+        shutil.copy(config_path, users_d_dir)
+
    def create_dir(self):
        """Create the instance directory and all the needed files there."""

--- a/tests/integration/test_version_update_after_mutation/configs/force_remove_data_recursively_on_drop.xml
+++ b/tests/integration/test_version_update_after_mutation/configs/force_remove_data_recursively_on_drop.xml
@ -0,0 +1,7 @@
+<clickhouse>
+    <profiles>
+        <default>
+            <force_remove_data_recursively_on_drop>1</force_remove_data_recursively_on_drop>
+        </default>
+    </profiles>
+</clickhouse>
--- a/tests/integration/test_version_update_after_mutation/test.py
+++ b/tests/integration/test_version_update_after_mutation/test.py
@ -51,6 +51,12 @@ def start_cluster():
        cluster.shutdown()


+def restart_node(node):
+    # set force_remove_data_recursively_on_drop (cannot be done before, because the version is too old)
+    node.put_users_config("configs/force_remove_data_recursively_on_drop.xml")
+    node.restart_with_latest_version(signal=9, fix_metadata=True)
+
+
 def test_mutate_and_upgrade(start_cluster):
    for node in [node1, node2]:
        node.query("DROP TABLE IF EXISTS mt")
@ -67,8 +73,9 @@ def test_mutate_and_upgrade(start_cluster):

    node2.query("DETACH TABLE mt")  # stop being leader
    node1.query("DETACH TABLE mt")  # stop being leader
-    node1.restart_with_latest_version(signal=9, fix_metadata=True)
-    node2.restart_with_latest_version(signal=9, fix_metadata=True)
+
+    restart_node(node1)
+    restart_node(node2)

    # After hard restart table can be in readonly mode
    exec_query_with_retry(
@ -124,7 +131,7 @@ def test_upgrade_while_mutation(start_cluster):
    # (We could be in process of creating some system table, which will leave empty directory on restart,
    # so when we start moving system tables from ordinary to atomic db, it will complain about some undeleted files)
    node3.query("SYSTEM FLUSH LOGS")
-    node3.restart_with_latest_version(signal=9, fix_metadata=True)
+    restart_node(node3)

    # checks for readonly
    exec_query_with_retry(node3, "OPTIMIZE TABLE mt1", sleep_time=5, retry_count=60)