Merge pull request #62534 from ClickHouse/Azure_write_buffer_parallel_upload

Support parallel write buffer for AzureBlobStorage
This commit is contained in:
Alexey Milovidov 2024-04-27 13:59:55 +00:00 committed by GitHub
commit a75a9e37be
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 14 additions and 3 deletions

View File

@ -221,7 +221,8 @@ std::unique_ptr<WriteBuffer> BackupWriterAzureBlobStorage::writeFile(const Strin
key,
DBMS_DEFAULT_BUFFER_SIZE,
write_settings,
settings);
settings,
threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
}
void BackupWriterAzureBlobStorage::removeFile(const String & file_name)

View File

@ -112,6 +112,7 @@ class IColumn;
M(Bool, azure_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in azure engine tables", 0) \
M(Bool, s3_check_objects_after_upload, false, "Check each uploaded object to s3 with head request to be sure that upload was successful", 0) \
M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \
M(Bool, azure_allow_parallel_part_upload, true, "Use multiple threads for azure multipart upload.", 0) \
M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \
M(Bool, s3_disable_checksum, false, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \
M(UInt64, s3_retry_attempts, 100, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \

View File

@ -94,7 +94,8 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
{"first_day_of_week", "Monday", "Monday", "Added a setting for the first day of the week for date/time functions"},
{"allow_experimental_database_replicated", false, true, "Database engine Replicated is now in Beta stage"},
{"temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds", (10 * 60 * 1000), (10 * 60 * 1000), "Wait time to lock cache for sapce reservation in temporary data in filesystem cache"},
}},
{"azure_allow_parallel_part_upload", "true", "true", "Use multiple threads for azure multipart upload."},
}},
{"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"},
{"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"},
{"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"},

View File

@ -154,6 +154,7 @@ void IDisk::copyThroughBuffers(
/// Disable parallel write. We already copy in parallel.
/// Avoid high memory usage. See test_s3_zero_copy_ttl/test.py::test_move_and_s3_memory_usage
write_settings.s3_allow_parallel_part_upload = false;
write_settings.azure_allow_parallel_part_upload = false;
asyncCopy(*this, from_path, *to_disk, to_path, copying_thread_pool, results, copy_root_dir, read_settings, write_settings, cancellation_hook);

View File

@ -282,12 +282,17 @@ std::unique_ptr<WriteBufferFromFileBase> AzureObjectStorage::writeObject( /// NO
LOG_TEST(log, "Writing file: {}", object.remote_path);
ThreadPoolCallbackRunnerUnsafe<void> scheduler;
if (write_settings.azure_allow_parallel_part_upload)
scheduler = threadPoolCallbackRunnerUnsafe<void>(getThreadPoolWriter(), "VFSWrite");
return std::make_unique<WriteBufferFromAzureBlobStorage>(
client.get(),
object.remote_path,
buf_size,
patchSettings(write_settings),
settings.get());
settings.get(),
std::move(scheduler));
}
void AzureObjectStorage::removeObjectImpl(const StoredObject & object, const SharedAzureClientPtr & client_ptr, bool if_exists)

View File

@ -23,6 +23,7 @@ struct WriteSettings
size_t filesystem_cache_reserve_space_wait_lock_timeout_milliseconds = 1000;
bool s3_allow_parallel_part_upload = true;
bool azure_allow_parallel_part_upload = true;
/// Monitoring
bool for_object_storage = false; // to choose which profile events should be incremented

View File

@ -5256,6 +5256,7 @@ WriteSettings Context::getWriteSettings() const
res.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds = settings.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds;
res.s3_allow_parallel_part_upload = settings.s3_allow_parallel_part_upload;
res.azure_allow_parallel_part_upload = settings.azure_allow_parallel_part_upload;
res.remote_throttler = getRemoteWriteThrottler();
res.local_throttler = getLocalWriteThrottler();