add async_load_databases server setting

2024-11-10 01:25:21 +00:00 · 2023-05-12 17:54:51 +00:00 · 2023-05-12 17:54:51 +00:00 · 4d31051bb9
commit 4d31051bb9
parent 3c1ae12c85
7 changed files with 59 additions and 29 deletions
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@ -1192,6 +1192,21 @@ Possible values:

 Default value: 128.

+## async_load_databases {#async_load_databases}
+
+Asynchronous loading of databases and tables.
+
+If `true` all non-system databases with `Ordinary`, `Atomic` and `Replicated` engine will be loaded asynchronously after ClickHouse server start up. Loading is done by AsyncLoader (see `system.async_loader` table and `async_loader_pool_size` server setting). Any query that tries to access a table, that is not yet loaded, will wait for exactly this table to be started up. If load job fails, query will rethrow an error (instead of shutting down the whole server in case of `async_load_databases = false`). The table that is waited for by at least one query will be loaded with higher priority. DDL queries on a database will wait for exactly that database to be started up.
+
+If `false`, all databases are loaded when the server starts.
+
+The default is `false`.
+
+**Example**
+
+``` xml
+<async_load_databases>true</async_load_databases>
+```

 ## async_loader_pool_size {#async_loader_pool_size}

--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -1626,9 +1626,6 @@ try

    LOG_INFO(log, "Loading metadata from {}", path_str);

-    /// Tasks for loading and starting up all databases except system
-    LoadTaskPtrs load_metadata;
-
    try
    {
        auto & database_catalog = DatabaseCatalog::instance();
@ -1649,7 +1646,7 @@ try
        database_catalog.loadMarkedAsDroppedTables();
        database_catalog.createBackgroundTasks();
        /// Then, load remaining databases (some of them maybe be loaded asynchronously)
-        load_metadata = loadMetadata(global_context, default_database);
+        auto load_metadata = loadMetadata(global_context, default_database, server_settings.async_load_databases);
        /// If we need to convert database engines, disable async tables loading
        convertDatabasesEnginesIfNeed(load_metadata, global_context);
        startupSystemTables(global_context);
@ -1664,6 +1661,7 @@ try
        tryLogCurrentException(log, "Caught exception while loading metadata");
        throw;
    }
+
    LOG_DEBUG(log, "Loaded metadata.");

    /// Init trace collector only after trace_log system table was created
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@ -349,6 +349,11 @@
    <async_loader_pool_size>16</async_loader_pool_size>
    -->

+    <!-- Enables asynchronous loading of databases and tables to speedup server startup.
+         Queries to not yet loaded entity will be blocked until load is finished.
+      -->
+    <async_load_databases>false</async_load_databases>
+
    <!-- On memory constrained environments you may have to set this to value larger than 1.
      -->
    <max_server_memory_usage_to_ram_ratio>0.9</max_server_memory_usage_to_ram_ratio>
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@ -79,6 +79,7 @@ namespace DB
    M(UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0) \
    M(UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0) \
    M(UInt64, async_loader_pool_size, 16, "The maximum number of threads that will be used for async loading of tables after server start.", 0) \
+    M(Bool, async_load_databases, false, "Enable asynchronous loading of databases and tables to speedup server startup. Queries to not yet loaded entity will be blocked until load is finished.", 0)
    M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0)


--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@ -1945,6 +1945,7 @@ size_t Context::getAsyncLoaderPoolSize() const
    // After server is started incoming queries can compete for resources with loading of the rest of the tables.
    // Thus it can be advantageous to lower number of threads after start using server setting `async_loader_pool_size`.
    // TODO(serxa): set async_loader max threads during server_start_job
+    // TODO(serxa): we need to add `turboMode()` if there are waiting queries. But how to create more workers only for foreground work?
    return shared->server_start_job && shared->server_start_job->status() == LoadStatus::OK ?
        shared->server_settings.async_loader_pool_size : getNumberOfPhysicalCPUCores();
 }
--- a/src/Interpreters/loadMetadata.cpp
+++ b/src/Interpreters/loadMetadata.cpp
@ -152,19 +152,29 @@ static void checkIncompleteOrdinaryToAtomicConversion(ContextPtr context, const
    }
 }

-LoadTaskPtrs loadMetadata(ContextMutablePtr context, const String & default_database_name)
+LoadTaskPtrs loadMetadata(ContextMutablePtr context, const String & default_database_name, bool async_load_databases)
 {
    Poco::Logger * log = &Poco::Logger::get("loadMetadata");

    String path = context->getPath() + "metadata";

-    /** There may exist 'force_restore_data' file, that means,
-      *  skip safety threshold on difference of data parts while initializing tables.
-      * This file is deleted after successful loading of tables.
-      * (flag is "one-shot")
-      */
+    /// There may exist 'force_restore_data' file, which means skip safety threshold
+    /// on difference of data parts while initializing tables.
+    /// This file is immediately deleted i.e. "one-shot".
    auto force_restore_data_flag_file = fs::path(context->getFlagsPath()) / "force_restore_data";
    bool has_force_restore_data_flag = fs::exists(force_restore_data_flag_file);
+    if (has_force_restore_data_flag)
+    {
+        try
+        {
+            fs::remove(force_restore_data_flag_file);
+        }
+        catch (...)
+        {
+            tryLogCurrentException("Load metadata", "Can't remove force restore file to enable data sanity checks");
+        }
+    }
+

    /// Loop over databases.
    std::map<String, String> databases;
@ -227,28 +237,28 @@ LoadTaskPtrs loadMetadata(ContextMutablePtr context, const String & default_data
    auto load_tasks = loader.loadTablesAsync();
    auto startup_tasks = loader.startupTablesAsync();

+    if (!async_load_databases) {
        // First, load all tables
        scheduleLoad(load_tasks);
-    waitLoad(load_tasks); // TODO(serxa): only wait for tables that must be loaded before server start
+        waitLoad(load_tasks);

-    // Then, startup all tables
+        // Then, startup all tables. This is done to postpone merges and mutations
+        // Note that with async loader it would be a total barrier, which is unacceptable for the purpose of waiting.
        scheduleLoad(startup_tasks);
-    waitLoad(startup_tasks); // TODO(serxa): only wait for tables that must be started before server start
-
-    if (has_force_restore_data_flag)
-    {
-        try
-        {
-            fs::remove(force_restore_data_flag_file); // TODO(serxa): when we should remove it with async loading? should we disable async loading with restore?
-        }
-        catch (...)
-        {
-            tryLogCurrentException("Load metadata", "Can't remove force restore file to enable data sanity checks");
-        }
-    }
+        waitLoad(startup_tasks);
+        return {};
+    } else {
+        // Schedule all the jobs.
+        // Note that to achieve behaviour similar to synchronous case (postponing of merges) we use priorities.
+        // All startup jobs have lower priorities than load jobs.
+        // So _almost_ all tables will finish loading before the first table startup it there are no queries.
+        // Query waiting for a table boost its priority to finish table startup faster than load of the other tables.
+        scheduleLoadAll(load_tasks, startup_tasks);

+        // Do NOT wait, just return tasks for continuation or later wait.
        return joinTasks(load_tasks, startup_tasks);
    }
+}

 static void loadSystemDatabaseImpl(ContextMutablePtr context, const String & database_name, const String & default_engine)
 {
--- a/src/Interpreters/loadMetadata.h
+++ b/src/Interpreters/loadMetadata.h
@ -12,7 +12,7 @@ void loadMetadataSystem(ContextMutablePtr context);

 /// Load tables from databases and add them to context. Database 'system' and 'information_schema' is ignored.
 /// Use separate function to load system tables.
-[[nodiscard]] LoadTaskPtrs loadMetadata(ContextMutablePtr context, const String & default_database_name = {});
+[[nodiscard]] LoadTaskPtrs loadMetadata(ContextMutablePtr context, const String & default_database_name = {}, bool async_load_databases = false);

 /// Background operations in system tables may slowdown loading of the rest tables,
 /// so we startup system tables after all databases are loaded.