From 52091f4ff81ea53ef3d73432ce02714a13f3a64d Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Thu, 24 Oct 2024 15:57:14 +0000
Subject: [PATCH] add docs and test

---
 .../settings.md                               | 18 ++++-
 .../configs/async_load_system_database.html   |  3 +
 .../test_async_load_databases/test.py         | 73 ++++++++++++++-----
 3 files changed, 75 insertions(+), 19 deletions(-)
 create mode 100644 tests/integration/test_async_load_databases/configs/async_load_system_database.html
diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index b6238487725..b1d0de21046 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -1975,6 +1975,22 @@ The default is `false`.
 <async_load_databases>true</async_load_databases>
 ```
 
+## async_load_system_database {#async_load_system_database}
+
+Asynchronous loading of system tables. Helpful if there is high amount of log tables and parts in system database. Independent of `async_load_databases` setting.
+
+If `true` all system databases with `Ordinary`, `Atomic` and `Replicated` engine will be loaded asynchronously after the ClickHouse server start up. See `system.asynchronous_loader` table, `tables_loader_background_pool_size` and `tables_loader_foreground_pool_size` server settings. Any query that tries to access a system table, that is not yet loaded, will wait for exactly this table to be started up. The table that is waited for by at least one query will be loaded with higher priority. Also consider setting a limit `max_waiting_queries` for the total number of waiting queries.
+
+If `false`, system database loads before server start.
+
+The default is `false`.
+
+**Example**
+
+``` xml
+<async_load_system_database>true</async_load_system_database>
+```
+
 ## tables_loader_foreground_pool_size {#tables_loader_foreground_pool_size}
 
 Sets the number of threads performing load jobs in foreground pool. The foreground pool is used for loading table synchronously before server start listening on a port and for loading tables that are waited for. Foreground pool has higher priority than background pool. It means that no job starts in background pool while there are jobs running in foreground pool.
@@ -3109,7 +3125,7 @@ By default, tunneling (i.e, `HTTP CONNECT`) is used to make `HTTPS` requests ove
 
 ### no_proxy
 By default, all requests will go through the proxy. In order to disable it for specific hosts, the `no_proxy` variable must be set.
-It can be set inside the `<proxy>` clause for list and remote resolvers and as an environment variable for environment resolver. 
+It can be set inside the `<proxy>` clause for list and remote resolvers and as an environment variable for environment resolver.
 It supports IP addresses, domains, subdomains and `'*'` wildcard for full bypass. Leading dots are stripped just like curl does.
 
 Example:
diff --git a/tests/integration/test_async_load_databases/configs/async_load_system_database.html b/tests/integration/test_async_load_databases/configs/async_load_system_database.html
new file mode 100644
index 00000000000..79823f5fbee
--- /dev/null
+++ b/tests/integration/test_async_load_databases/configs/async_load_system_database.html
@@ -0,0 +1,3 @@
+<clickhouse>
+    <async_load_system_database>true</async_load_system_database>
+</clickhouse>
diff --git a/tests/integration/test_async_load_databases/test.py b/tests/integration/test_async_load_databases/test.py
index 7fc6fd222d1..dd11067dfd4 100644
--- a/tests/integration/test_async_load_databases/test.py
+++ b/tests/integration/test_async_load_databases/test.py
@@ -1,4 +1,5 @@
 import random
+import time
 
 import pytest
 
@@ -13,25 +14,35 @@ DICTIONARY_FILES = [
 ]
 
 cluster = ClickHouseCluster(__file__)
-instance = cluster.add_instance(
-    "instance",
+node1 = cluster.add_instance(
+    "node1",
     main_configs=["configs/config.xml"],
     dictionaries=DICTIONARY_FILES,
     stay_alive=True,
 )
 
+node2 = cluster.add_instance(
+    "node2",
+    main_configs=[
+        "configs/async_load_system_database.xml",
+    ],
+    dictionaries=DICTIONARY_FILES,
+    stay_alive=True,
+)
+
 
 @pytest.fixture(scope="module")
 def started_cluster():
     try:
         cluster.start()
 
-        instance.query(
-            """
-            CREATE DATABASE IF NOT EXISTS dict ENGINE=Dictionary;
-            CREATE DATABASE IF NOT EXISTS test;
-            """
-        )
+        for node in [node1, node2]:
+            node.query(
+                """
+                CREATE DATABASE IF NOT EXISTS dict ENGINE=Dictionary;
+                CREATE DATABASE IF NOT EXISTS test;
+                """
+            )
 
         yield cluster
 
@@ -40,13 +51,13 @@ def started_cluster():
 
 
 def get_status(dictionary_name):
-    return instance.query(
+    return node1.query(
         "SELECT status FROM system.dictionaries WHERE name='" + dictionary_name + "'"
     ).rstrip("\n")
 
 
 def test_dict_get_data(started_cluster):
-    query = instance.query
+    query = node1.query
 
     query(
         "CREATE TABLE test.elements (id UInt64, a String, b Int32, c Float64) ENGINE=Log;"
@@ -80,7 +91,7 @@ def test_dict_get_data(started_cluster):
 
     # Wait for dictionaries to be reloaded.
     assert_eq_with_retry(
-        instance,
+        node1,
         "SELECT dictHas('dep_x', toUInt64(3))",
         "1",
         sleep_time=2,
@@ -94,7 +105,7 @@ def test_dict_get_data(started_cluster):
     # so dep_x and dep_z are not going to be updated after the following INSERT.
     query("INSERT INTO test.elements VALUES (4, 'ether', 404, 0.001)")
     assert_eq_with_retry(
-        instance,
+        node1,
         "SELECT dictHas('dep_y', toUInt64(4))",
         "1",
         sleep_time=2,
@@ -104,11 +115,11 @@ def test_dict_get_data(started_cluster):
     assert query("SELECT dictGetString('dep_y', 'a', toUInt64(4))") == "ether\n"
     assert query("SELECT dictGetString('dep_z', 'a', toUInt64(4))") == "ZZ\n"
     query("DROP TABLE IF EXISTS test.elements;")
-    instance.restart_clickhouse()
+    node1.restart_clickhouse()
 
 
 def dependent_tables_assert():
-    res = instance.query("select database || '.' || name from system.tables")
+    res = node1.query("select database || '.' || name from system.tables")
     assert "system.join" in res
     assert "default.src" in res
     assert "dict.dep_y" in res
@@ -119,7 +130,7 @@ def dependent_tables_assert():
 
 
 def test_dependent_tables(started_cluster):
-    query = instance.query
+    query = node1.query
     query("create database lazy engine=Lazy(10)")
     query("create database a")
     query("create table lazy.src (n int, m int) engine=Log")
@@ -157,7 +168,7 @@ def test_dependent_tables(started_cluster):
     )
 
     dependent_tables_assert()
-    instance.restart_clickhouse()
+    node1.restart_clickhouse()
     dependent_tables_assert()
     query("drop table a.t")
     query("drop table lazy.log")
@@ -170,14 +181,14 @@ def test_dependent_tables(started_cluster):
 
 
 def test_multiple_tables(started_cluster):
-    query = instance.query
+    query = node1.query
     tables_count = 20
     for i in range(tables_count):
         query(
             f"create table test.table_{i} (n UInt64, s String) engine=MergeTree order by n as select number, randomString(100) from numbers(100)"
         )
 
-    instance.restart_clickhouse()
+    node1.restart_clickhouse()
 
     order = [i for i in range(tables_count)]
     random.shuffle(order)
@@ -185,3 +196,29 @@ def test_multiple_tables(started_cluster):
         assert query(f"select count() from test.table_{i}") == "100\n"
     for i in range(tables_count):
         query(f"drop table test.table_{i} sync")
+
+
+def test_async_load_system_database(started_cluster):
+    id = 1
+    for i in range(4):
+        # Access some system tables that might be still loading
+        if id > 1:
+            for j in range(3):
+                node2.query(f"select count() from system.text_log_{random.randint(1, id - 1)}")
+                node2.query(f"select count() from system.query_log_{random.randint(1, id - 1)}")
+
+        # Generate more system tables
+        for j in range(30):
+            while True:
+                count = int(
+                    node2.query("select count() from system.tables where database = 'system' and name in ['query_log', 'text_log']")
+                )
+                if count == 2:
+                    break
+                time.sleep(0.1)
+            node2.query(f"rename table system.text_log to system.text_log_{id}")
+            node2.query(f"rename table system.query_log to system.query_log_{id}")
+            id += 1
+
+        # Trigger async load of system database
+        node2.restart_clickhouse()