From 66489ef6d616cc92b8570a50bf46f9e7bef13ea9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9A=D0=B8=D1=80=D0=B8=D0=BB=D0=BB=20=D0=93=D0=B0=D1=80?= =?UTF-8?q?=D0=B1=D0=B0=D1=80?= Date: Tue, 30 Jul 2024 16:09:50 +0300 Subject: [PATCH] Rename and move setting to ObjectStorage classes --- src/Core/Defines.h | 2 + src/Core/Settings.h | 2 +- src/Core/SettingsChangesHistory.cpp | 6 +- src/Disks/IDisk.cpp | 1 - src/Disks/IDisk.h | 5 -- src/Disks/IDiskTransaction.h | 7 --- .../ObjectStorages/DiskObjectStorage.cpp | 10 +++- src/Disks/ObjectStorages/DiskObjectStorage.h | 2 + .../DiskObjectStorageTransaction.cpp | 59 +++++++------------ .../DiskObjectStorageTransaction.h | 18 ++---- src/IO/S3Defines.h | 1 - .../test_disks_app_func/config.xml | 2 +- 12 files changed, 45 insertions(+), 70 deletions(-) diff --git a/src/Core/Defines.h b/src/Core/Defines.h index 6df335a9c8f..bc76ca9bf2f 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -121,4 +121,6 @@ static constexpr auto QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS = 1000000000; static constexpr auto QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS = 0; #endif +static constexpr auto DEFAULT_REMOVE_SHARED_RECURSIVE_FILE_LIMIT = 1000uz; + } diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 3d89bf6c78d..71984e81807 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -891,7 +891,7 @@ class IColumn; M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \ M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \ M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) ALIAS(extract_kvp_max_pairs_per_row) \ - M(UInt64, object_storage_remove_recursive_batch_size, S3::DEFAULT_REMOVE_SHARED_RECURSIVE_BATCH_SIZE, "Max number of files to collect for removal in one transaction. Used to reduce memory usage.", 0) \ + M(UInt64, object_storage_remove_recursive_file_limit, DEFAULT_REMOVE_SHARED_RECURSIVE_FILE_LIMIT, "Max number of files to store in memory during remove. Zero value means unlimited. Used to reduce memory usage.", 0) \ \ \ /* ###################################### */ \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index eb2aabb1fc3..936880c2e00 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -57,7 +57,8 @@ String ClickHouseVersion::toString() const /// Note: please check if the key already exists to prevent duplicate entries. static std::initializer_list> settings_changes_history_initializer = { - {"24.8", {{"object_storage_remove_recursive_batch_size", UINT64_MAX, 1000, "Added new setting to limit number of files stored in memory while removing from object storage."}}}, + {"24.8", {{"object_storage_remove_recursive_file_limit", 0, 1000, "Added new setting to limit number of files stored in memory while removing from object storage. Zero value means unlimited."} + }}, {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."}, {"output_format_binary_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in RowBinaryWithNamesAndTypes output format"}, {"input_format_binary_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in RowBinaryWithNamesAndTypes input format"}, @@ -81,7 +82,8 @@ static std::initializer_list #include #include -#include #include #include #include @@ -116,7 +115,6 @@ public: /// Default constructor. IDisk(const String & name_, const Poco::Util::AbstractConfiguration & config, const String & config_prefix) : name(name_) - , remove_shared_recursive_batch_size(config.getUInt64(config_prefix + ".remove_shared_recursive_batch_size", S3::DEFAULT_REMOVE_SHARED_RECURSIVE_BATCH_SIZE)) , copying_thread_pool( CurrentMetrics::IDiskCopierThreads, CurrentMetrics::IDiskCopierThreadsActive, @@ -127,7 +125,6 @@ public: explicit IDisk(const String & name_) : name(name_) - , remove_shared_recursive_batch_size(S3::DEFAULT_REMOVE_SHARED_RECURSIVE_BATCH_SIZE) , copying_thread_pool( CurrentMetrics::IDiskCopierThreads, CurrentMetrics::IDiskCopierThreadsActive, CurrentMetrics::IDiskCopierThreadsScheduled, 16) { @@ -505,8 +502,6 @@ protected: virtual void checkAccessImpl(const String & path); - UInt64 remove_shared_recursive_batch_size; - private: ThreadPool copying_thread_pool; bool is_custom_disk = false; diff --git a/src/Disks/IDiskTransaction.h b/src/Disks/IDiskTransaction.h index 9eef5d2266f..fc84281baea 100644 --- a/src/Disks/IDiskTransaction.h +++ b/src/Disks/IDiskTransaction.h @@ -32,10 +32,6 @@ using RemoveBatchRequest = std::vector; struct IDiskTransaction : private boost::noncopyable { public: - IDiskTransaction(): remove_shared_recursive_batch_size(S3::DEFAULT_REMOVE_SHARED_RECURSIVE_BATCH_SIZE) {} - - explicit IDiskTransaction(UInt64 remove_shared_recursive_batch_size_): remove_shared_recursive_batch_size(remove_shared_recursive_batch_size_) {} - /// Tries to commit all accumulated operations simultaneously. /// If something fails rollback and throw exception. virtual void commit() = 0; @@ -135,9 +131,6 @@ public: /// Truncate file to the target size. virtual void truncateFile(const std::string & src_path, size_t target_size) = 0; - -protected: - UInt64 remove_shared_recursive_batch_size; }; using DiskTransactionPtr = std::shared_ptr; diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index e04318b1a5a..af2f9bf4258 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -42,7 +42,7 @@ DiskTransactionPtr DiskObjectStorage::createObjectStorageTransaction() *object_storage, *metadata_storage, send_metadata ? metadata_helper.get() : nullptr, - remove_shared_recursive_batch_size); + remove_shared_recursive_file_limit); } DiskTransactionPtr DiskObjectStorage::createObjectStorageTransactionToAnotherDisk(DiskObjectStorage& to_disk) @@ -52,7 +52,8 @@ DiskTransactionPtr DiskObjectStorage::createObjectStorageTransactionToAnotherDis *metadata_storage, *to_disk.getObjectStorage(), *to_disk.getMetadataStorage(), - send_metadata ? metadata_helper.get() : nullptr); + send_metadata ? metadata_helper.get() : nullptr, + remove_shared_recursive_file_limit); } @@ -72,6 +73,7 @@ DiskObjectStorage::DiskObjectStorage( , read_resource_name(config.getString(config_prefix + ".read_resource", "")) , write_resource_name(config.getString(config_prefix + ".write_resource", "")) , metadata_helper(std::make_unique(this, ReadSettings{}, WriteSettings{})) + , remove_shared_recursive_file_limit(config.getUInt64(config_prefix + ".remove_shared_recursive_file_limit", DEFAULT_REMOVE_SHARED_RECURSIVE_FILE_LIMIT)) { data_source_description = DataSourceDescription{ .type = DataSourceType::ObjectStorage, @@ -374,6 +376,8 @@ void DiskObjectStorage::removeSharedFileIfExists(const String & path, bool delet void DiskObjectStorage::removeSharedRecursive( const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) { + /// At most remove_shared_recursive_file_limit files are removed in one transaction + /// Retry until all of them are removed while (exists(path)) { auto transaction = createObjectStorageTransaction(); @@ -558,6 +562,8 @@ void DiskObjectStorage::applyNewSettings( write_resource_name = new_write_resource_name; } + remove_shared_recursive_file_limit = config.getUInt64(config_prefix + ".remove_shared_recursive_file_limit", DEFAULT_REMOVE_SHARED_RECURSIVE_FILE_LIMIT); + IDisk::applyNewSettings(config, context_, config_prefix, disk_map); } diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index 5c45a258806..2759eba2964 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -246,6 +246,8 @@ private: String write_resource_name; std::unique_ptr metadata_helper; + + UInt64 remove_shared_recursive_file_limit; }; using DiskObjectStoragePtr = std::shared_ptr; diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp index 0db2c4911df..4bf67805219 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp @@ -30,34 +30,13 @@ namespace ErrorCodes DiskObjectStorageTransaction::DiskObjectStorageTransaction( IObjectStorage & object_storage_, IMetadataStorage & metadata_storage_, - DiskObjectStorageRemoteMetadataRestoreHelper * metadata_helper_) + DiskObjectStorageRemoteMetadataRestoreHelper * metadata_helper_, + UInt64 remove_shared_recursive_file_limit_) : object_storage(object_storage_) , metadata_storage(metadata_storage_) , metadata_transaction(metadata_storage.createTransaction()) , metadata_helper(metadata_helper_) -{} - -DiskObjectStorageTransaction::DiskObjectStorageTransaction( - IObjectStorage & object_storage_, - IMetadataStorage & metadata_storage_, - DiskObjectStorageRemoteMetadataRestoreHelper * metadata_helper_, - UInt64 remove_shared_recursive_batch_size_) - : IDiskTransaction(remove_shared_recursive_batch_size_) - , object_storage(object_storage_) - , metadata_storage(metadata_storage_) - , metadata_transaction(metadata_storage.createTransaction()) - , metadata_helper(metadata_helper_) -{} - -DiskObjectStorageTransaction::DiskObjectStorageTransaction( - IObjectStorage & object_storage_, - IMetadataStorage & metadata_storage_, - DiskObjectStorageRemoteMetadataRestoreHelper * metadata_helper_, - MetadataTransactionPtr metadata_transaction_) - : object_storage(object_storage_) - , metadata_storage(metadata_storage_) - , metadata_transaction(metadata_transaction_) - , metadata_helper(metadata_helper_) + , remove_shared_recursive_file_limit(remove_shared_recursive_file_limit_) {} DiskObjectStorageTransaction::DiskObjectStorageTransaction( @@ -65,12 +44,12 @@ DiskObjectStorageTransaction::DiskObjectStorageTransaction( IMetadataStorage & metadata_storage_, DiskObjectStorageRemoteMetadataRestoreHelper * metadata_helper_, MetadataTransactionPtr metadata_transaction_, - UInt64 remove_shared_recursive_batch_size_) - : IDiskTransaction(remove_shared_recursive_batch_size_) - , object_storage(object_storage_) + UInt64 remove_shared_recursive_file_limit_) + : object_storage(object_storage_) , metadata_storage(metadata_storage_) , metadata_transaction(metadata_transaction_) , metadata_helper(metadata_helper_) + , remove_shared_recursive_file_limit(remove_shared_recursive_file_limit_) {} MultipleDisksObjectStorageTransaction::MultipleDisksObjectStorageTransaction( @@ -78,8 +57,9 @@ MultipleDisksObjectStorageTransaction::MultipleDisksObjectStorageTransaction( IMetadataStorage & metadata_storage_, IObjectStorage& destination_object_storage_, IMetadataStorage& destination_metadata_storage_, - DiskObjectStorageRemoteMetadataRestoreHelper * metadata_helper_) - : DiskObjectStorageTransaction(object_storage_, metadata_storage_, metadata_helper_, destination_metadata_storage_.createTransaction()) + DiskObjectStorageRemoteMetadataRestoreHelper * metadata_helper_, + UInt64 remove_shared_recursive_file_limit_) + : DiskObjectStorageTransaction(object_storage_, metadata_storage_, metadata_helper_, destination_metadata_storage_.createTransaction(), remove_shared_recursive_file_limit_) , destination_object_storage(destination_object_storage_) , destination_metadata_storage(destination_metadata_storage_) {} @@ -316,7 +296,7 @@ struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOp const bool keep_all_batch_data; /// paths inside the 'this->path' const NameSet file_names_remove_metadata_only; - const UInt64 batch_size; + const UInt64 limit; /// map from local_path to its remote objects with hardlinks counter /// local_path is the path inside 'this->path' @@ -328,12 +308,12 @@ struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOp const std::string & path_, bool keep_all_batch_data_, const NameSet & file_names_remove_metadata_only_, - UInt64 batch_size_) + UInt64 limit_) : IDiskObjectStorageOperation(object_storage_, metadata_storage_) , path(path_) , keep_all_batch_data(keep_all_batch_data_) , file_names_remove_metadata_only(file_names_remove_metadata_only_) - , batch_size(batch_size_) + , limit(limit_) {} std::string getInfoForLog() const override @@ -341,10 +321,15 @@ struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOp return fmt::format("RemoveRecursiveObjectStorageOperation (path: {})", path); } + bool checkLimitReached() const + { + return limit > 0 && objects_to_remove_by_path.size() == limit; + } + void removeMetadataRecursive(MetadataTransactionPtr tx, const std::string & path_to_remove) { checkStackSize(); /// This is needed to prevent stack overflow in case of cyclic symlinks. - if (objects_to_remove_by_path.size() == batch_size) + if (checkLimitReached()) return; if (metadata_storage.isFile(path_to_remove)) @@ -388,12 +373,12 @@ struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOp { for (auto it = metadata_storage.iterateDirectory(path_to_remove); it->isValid(); it->next()) { - if (objects_to_remove_by_path.size() == batch_size) + if (checkLimitReached()) return; removeMetadataRecursive(tx, it->path()); } - /// Do not delete in case directory contains >= batch_size files - if (objects_to_remove_by_path.size() < batch_size) + /// Do not delete in case directory contains >= limit files + if (objects_to_remove_by_path.size() < limit) tx->removeDirectory(path_to_remove); } } @@ -717,7 +702,7 @@ void DiskObjectStorageTransaction::removeSharedRecursive( const std::string & path, bool keep_all_shared_data, const NameSet & file_names_remove_metadata_only) { auto operation = std::make_unique( - object_storage, metadata_storage, path, keep_all_shared_data, file_names_remove_metadata_only, remove_shared_recursive_batch_size); + object_storage, metadata_storage, path, keep_all_shared_data, file_names_remove_metadata_only, remove_shared_recursive_file_limit); operations_to_execute.emplace_back(std::move(operation)); } diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h index 8a782cbd2e9..06be42f5881 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h +++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h @@ -63,30 +63,21 @@ protected: DiskObjectStorageOperations operations_to_execute; - DiskObjectStorageTransaction( - IObjectStorage & object_storage_, - IMetadataStorage & metadata_storage_, - DiskObjectStorageRemoteMetadataRestoreHelper * metadata_helper_, - MetadataTransactionPtr metadata_transaction_); + UInt64 remove_shared_recursive_file_limit; DiskObjectStorageTransaction( IObjectStorage & object_storage_, IMetadataStorage & metadata_storage_, DiskObjectStorageRemoteMetadataRestoreHelper * metadata_helper_, MetadataTransactionPtr metadata_transaction_, - UInt64 remove_shared_recursive_batch_size); + UInt64 remove_shared_recursive_file_limit); public: - DiskObjectStorageTransaction( - IObjectStorage & object_storage_, - IMetadataStorage & metadata_storage_, - DiskObjectStorageRemoteMetadataRestoreHelper * metadata_helper_); - DiskObjectStorageTransaction( IObjectStorage & object_storage_, IMetadataStorage & metadata_storage_, DiskObjectStorageRemoteMetadataRestoreHelper * metadata_helper_, - UInt64 remove_shared_recursive_batch_size); + UInt64 remove_shared_recursive_file_limit); void commit() override; void undo() override; @@ -149,7 +140,8 @@ struct MultipleDisksObjectStorageTransaction final : public DiskObjectStorageTra IMetadataStorage & metadata_storage_, IObjectStorage& destination_object_storage, IMetadataStorage& destination_metadata_storage, - DiskObjectStorageRemoteMetadataRestoreHelper * metadata_helper_); + DiskObjectStorageRemoteMetadataRestoreHelper * metadata_helper_, + UInt64 remove_shared_recursive_file_limit_); void copyFile(const std::string & from_file_path, const std::string & to_file_path, const ReadSettings & read_settings, const WriteSettings &) override; }; diff --git a/src/IO/S3Defines.h b/src/IO/S3Defines.h index 2db93369a95..332ebcfea92 100644 --- a/src/IO/S3Defines.h +++ b/src/IO/S3Defines.h @@ -34,7 +34,6 @@ inline static constexpr uint64_t DEFAULT_MAX_SINGLE_READ_TRIES = 4; inline static constexpr uint64_t DEFAULT_MAX_UNEXPECTED_WRITE_ERROR_RETRIES = 4; inline static constexpr uint64_t DEFAULT_MAX_REDIRECTS = 10; inline static constexpr uint64_t DEFAULT_RETRY_ATTEMPTS = 100; -inline static constexpr uint64_t DEFAULT_REMOVE_SHARED_RECURSIVE_BATCH_SIZE = 1000; inline static constexpr bool DEFAULT_ALLOW_NATIVE_COPY = true; inline static constexpr bool DEFAULT_CHECK_OBJECTS_AFTER_UPLOAD = false; diff --git a/tests/integration/test_disks_app_func/config.xml b/tests/integration/test_disks_app_func/config.xml index fb9fb34a4d1..99dc3e17089 100644 --- a/tests/integration/test_disks_app_func/config.xml +++ b/tests/integration/test_disks_app_func/config.xml @@ -15,7 +15,7 @@ http://minio1:9001/root/data/ minio minio123 - 3 + 3