diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index dcfc1916450..53c94cff22d 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -1,12 +1,17 @@ #include +#include +#include +#include +#include +#include #include #include #include #include +#include #include #include -#include #include #include #include @@ -15,14 +20,11 @@ #include #include #include -#include -#include -#include #include #include -#include -#include -#include +#include +#include +#include namespace fs = std::filesystem; @@ -612,7 +614,7 @@ void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const Iterat }; /// Metadata files to load: name and flag for .tmp_drop files - std::set> metadata_files; + std::vector> metadata_files; fs::directory_iterator dir_end; for (fs::directory_iterator dir_it(getMetadataPath()); dir_it != dir_end; ++dir_it) @@ -633,7 +635,7 @@ void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const Iterat if (endsWith(file_name, ".sql.tmp_drop")) { /// There are files that we tried to delete previously - metadata_files.emplace(file_name, false); + metadata_files.emplace_back(file_name, false); } else if (endsWith(file_name, ".sql.tmp")) { @@ -644,23 +646,30 @@ void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const Iterat else if (endsWith(file_name, ".sql")) { /// The required files have names like `table_name.sql` - metadata_files.emplace(file_name, true); + metadata_files.emplace_back(file_name, true); } else throw Exception(ErrorCodes::INCORRECT_FILE_NAME, "Incorrect file extension: {} in metadata directory {}", file_name, getMetadataPath()); } + std::sort(metadata_files.begin(), metadata_files.end()); + metadata_files.erase(std::unique(metadata_files.begin(), metadata_files.end()), metadata_files.end()); + /// Read and parse metadata in parallel ThreadPool pool(CurrentMetrics::DatabaseOnDiskThreads, CurrentMetrics::DatabaseOnDiskThreadsActive, CurrentMetrics::DatabaseOnDiskThreadsScheduled); - for (const auto & file : metadata_files) + const auto batch_size = metadata_files.size() / pool.getMaxThreads() + 1; + for (auto it = metadata_files.begin(); it < metadata_files.end(); std::advance(it, batch_size)) { - pool.scheduleOrThrowOnError([&]() - { - if (file.second) - process_metadata_file(file.first); - else - process_tmp_drop_metadata_file(file.first); - }); + std::span batch{it, std::min(std::next(it, batch_size), metadata_files.end())}; + pool.scheduleOrThrowOnError( + [batch, &process_metadata_file, &process_tmp_drop_metadata_file]() mutable + { + for (const auto & file : batch) + if (file.second) + process_metadata_file(file.first); + else + process_tmp_drop_metadata_file(file.first); + }); } pool.wait(); }