Fix flakiness of test_version_update_after_mutation by enabling force_remove_data_recursively_on_drop

Since there can be some leftovers:

    2023.07.24 07:08:25.238066 [ 140 ] {} <Error> Application: Code: 219. DB::Exception: Cannot drop: filesystem error: in remove: Directory not empty ["/var/lib/clickhouse/data/system/"]. Probably database contain some detached tables or metadata leftovers from Ordinary engine. If you want to remove all data anyway, try to attach database back and drop it again with enabled force_remove_data_recursively_on_drop setting: Exception while trying to convert database system from Ordinary to Atomic. It may be in some intermediate state. You can finish conversion manually by moving the rest tables from system to .tmp_convert.system.9396432095832455195 (using RENAME TABLE) and executing DROP DATABASE system and RENAME DATABASE .tmp_convert.system.9396432095832455195 TO system. (DATABASE_NOT_EMPTY), Stack trace (when copying this message, always include the lines below):

    0. DB::Exception::Exception(DB::Exception::MessageMasked&&, int, bool) @ 0x000000000e68af57 in /usr/bin/clickhouse
    1. ? @ 0x000000000cab443c in /usr/bin/clickhouse
    2. DB::DatabaseOnDisk::drop(std::shared_ptr<DB::Context const>) @ 0x000000001328d617 in /usr/bin/clickhouse
    3. DB::DatabaseCatalog::detachDatabase(std::shared_ptr<DB::Context const>, String const&, bool, bool) @ 0x0000000013524a6c in /usr/bin/clickhouse
    4. DB::InterpreterDropQuery::executeToDatabaseImpl(DB::ASTDropQuery const&, std::shared_ptr<DB::IDatabase>&, std::vector<StrongTypedef<wide::integer<128ul, unsigned int>, DB::UUIDTag>, std::allocator<StrongTypedef<wide::integer<128ul, unsigned int>, DB::UUIDTag>>>&) @ 0x0000000013bc05e4 in /usr/bin/clickhouse
    5. DB::InterpreterDropQuery::executeToDatabase(DB::ASTDropQuery const&) @ 0x0000000013bbc6b8 in /usr/bin/clickhouse
    6. DB::InterpreterDropQuery::execute() @ 0x0000000013bbba22 in /usr/bin/clickhouse
    7. ? @ 0x00000000140b13a5 in /usr/bin/clickhouse
    8. DB::executeQuery(String const&, std::shared_ptr<DB::Context>, bool, DB::QueryProcessingStage::Enum) @ 0x00000000140ad20e in /usr/bin/clickhouse
    9. ? @ 0x00000000140d2ef0 in /usr/bin/clickhouse
    10. DB::maybeConvertSystemDatabase(std::shared_ptr<DB::Context>) @ 0x00000000140d0aaf in /usr/bin/clickhouse
    11. DB::Server::main(std::vector<String, std::allocator<String>> const&) @ 0x000000000e724e55 in /usr/bin/clickhouse
    12. Poco::Util::Application::run() @ 0x0000000017ead086 in /usr/bin/clickhouse
    13. DB::Server::run() @ 0x000000000e714a5d in /usr/bin/clickhouse
    14. Poco::Util::ServerApplication::run(int, char**) @ 0x0000000017ec07b9 in /usr/bin/clickhouse
    15. mainEntryClickHouseServer(int, char**) @ 0x000000000e711a26 in /usr/bin/clickhouse
    16. main @ 0x0000000008cf13cf in /usr/bin/clickhouse
    17. __libc_start_main @ 0x0000000000021b97 in /lib/x86_64-linux-gnu/libc-2.27.so
    18. _start @ 0x00000000080705ae in /usr/bin/clickhouse
     (version 23.7.1.2012)

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
This commit is contained in:
Azat Khuzhin 2023-07-24 10:14:23 +02:00
parent f17844e9c2
commit 0401dc453e
3 changed files with 26 additions and 3 deletions

View File

@ -3199,6 +3199,7 @@ class ClickHouseInstance:
):
self.name = name
self.base_cmd = cluster.base_cmd
self.base_dir = base_path
self.docker_id = cluster.get_instance_docker_id(self.name)
self.cluster = cluster
self.hostname = hostname if hostname is not None else self.name
@ -4193,6 +4194,14 @@ class ClickHouseInstance:
["bash", "-c", f"sed -i 's/{replace}/{replacement}/g' {path_to_config}"]
)
def put_users_config(self, config_path):
"""Put new config (useful if you cannot put it at the start)"""
instance_config_dir = p.abspath(p.join(self.path, "configs"))
users_d_dir = p.abspath(p.join(instance_config_dir, "users.d"))
config_path = p.join(self.base_dir, config_path)
shutil.copy(config_path, users_d_dir)
def create_dir(self):
"""Create the instance directory and all the needed files there."""

View File

@ -0,0 +1,7 @@
<clickhouse>
<profiles>
<default>
<force_remove_data_recursively_on_drop>1</force_remove_data_recursively_on_drop>
</default>
</profiles>
</clickhouse>

View File

@ -51,6 +51,12 @@ def start_cluster():
cluster.shutdown()
def restart_node(node):
# set force_remove_data_recursively_on_drop (cannot be done before, because the version is too old)
node.put_users_config("configs/force_remove_data_recursively_on_drop.xml")
node.restart_with_latest_version(signal=9, fix_metadata=True)
def test_mutate_and_upgrade(start_cluster):
for node in [node1, node2]:
node.query("DROP TABLE IF EXISTS mt")
@ -67,8 +73,9 @@ def test_mutate_and_upgrade(start_cluster):
node2.query("DETACH TABLE mt") # stop being leader
node1.query("DETACH TABLE mt") # stop being leader
node1.restart_with_latest_version(signal=9, fix_metadata=True)
node2.restart_with_latest_version(signal=9, fix_metadata=True)
restart_node(node1)
restart_node(node2)
# After hard restart table can be in readonly mode
exec_query_with_retry(
@ -124,7 +131,7 @@ def test_upgrade_while_mutation(start_cluster):
# (We could be in process of creating some system table, which will leave empty directory on restart,
# so when we start moving system tables from ordinary to atomic db, it will complain about some undeleted files)
node3.query("SYSTEM FLUSH LOGS")
node3.restart_with_latest_version(signal=9, fix_metadata=True)
restart_node(node3)
# checks for readonly
exec_query_with_retry(node3, "OPTIMIZE TABLE mt1", sleep_time=5, retry_count=60)