2019-12-12 08:57:25 +00:00
|
|
|
#include "IDisk.h"
|
2020-08-07 11:40:19 +00:00
|
|
|
#include "Disks/Executor.h"
|
2020-03-19 16:37:55 +00:00
|
|
|
#include <IO/ReadBufferFromFileBase.h>
|
|
|
|
#include <IO/WriteBufferFromFileBase.h>
|
2020-08-07 11:40:19 +00:00
|
|
|
#include <IO/copyData.h>
|
2020-03-19 16:37:55 +00:00
|
|
|
#include <Poco/Logger.h>
|
2022-04-27 15:05:45 +00:00
|
|
|
#include <Common/logger_useful.h>
|
2020-08-07 11:40:19 +00:00
|
|
|
#include <Common/setThreadName.h>
|
2022-11-17 20:20:36 +00:00
|
|
|
#include <Core/ServerUUID.h>
|
2022-07-10 20:42:16 +00:00
|
|
|
#include <Disks/ObjectStorages/MetadataStorageFromDisk.h>
|
|
|
|
#include <Disks/ObjectStorages/FakeMetadataStorageFromDisk.h>
|
2022-07-05 13:52:39 +00:00
|
|
|
#include <Disks/ObjectStorages/LocalObjectStorage.h>
|
2022-06-15 11:51:21 +00:00
|
|
|
#include <Disks/FakeDiskTransaction.h>
|
2019-12-12 08:57:25 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
2020-01-18 22:17:48 +00:00
|
|
|
|
2020-07-12 02:31:58 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int NOT_IMPLEMENTED;
|
2022-11-17 20:20:36 +00:00
|
|
|
extern const int CANNOT_READ_ALL_DATA;
|
|
|
|
extern const int LOGICAL_ERROR;
|
2020-07-12 02:31:58 +00:00
|
|
|
}
|
|
|
|
|
2022-06-13 19:07:10 +00:00
|
|
|
bool IDisk::isDirectoryEmpty(const String & path) const
|
2019-12-12 08:57:25 +00:00
|
|
|
{
|
|
|
|
return !iterateDirectory(path)->isValid();
|
|
|
|
}
|
2020-03-19 16:37:55 +00:00
|
|
|
|
2022-10-20 17:51:27 +00:00
|
|
|
void IDisk::copyFile(const String & from_file_path, IDisk & to_disk, const String & to_file_path, const WriteSettings & settings) /// NOLINT
|
2020-03-19 16:37:55 +00:00
|
|
|
{
|
2021-05-05 15:10:14 +00:00
|
|
|
LOG_DEBUG(&Poco::Logger::get("IDisk"), "Copying from {} (path: {}) {} to {} (path: {}) {}.",
|
2022-04-21 12:39:12 +00:00
|
|
|
getName(), getPath(), from_file_path, to_disk.getName(), to_disk.getPath(), to_file_path);
|
2020-03-19 16:37:55 +00:00
|
|
|
|
2022-04-21 12:39:12 +00:00
|
|
|
auto in = readFile(from_file_path);
|
2022-10-20 17:51:27 +00:00
|
|
|
auto out = to_disk.writeFile(to_file_path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, settings);
|
2020-03-19 16:37:55 +00:00
|
|
|
copyData(*in, *out);
|
2020-10-06 09:38:00 +00:00
|
|
|
out->finalize();
|
2020-03-19 16:37:55 +00:00
|
|
|
}
|
|
|
|
|
2020-08-07 11:40:19 +00:00
|
|
|
|
2022-06-15 11:51:21 +00:00
|
|
|
DiskTransactionPtr IDisk::createTransaction()
|
|
|
|
{
|
|
|
|
return std::make_shared<FakeDiskTransaction>(*this);
|
|
|
|
}
|
|
|
|
|
|
|
|
void IDisk::removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only)
|
|
|
|
{
|
|
|
|
for (const auto & file : files)
|
|
|
|
{
|
|
|
|
bool keep_file = keep_all_batch_data || file_names_remove_metadata_only.contains(fs::path(file.path).filename());
|
|
|
|
if (file.if_exists)
|
|
|
|
removeSharedFileIfExists(file.path, keep_file);
|
|
|
|
else
|
|
|
|
removeSharedFile(file.path, keep_file);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-08-07 11:40:19 +00:00
|
|
|
using ResultsCollector = std::vector<std::future<void>>;
|
|
|
|
|
2022-10-20 17:51:27 +00:00
|
|
|
void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_path, Executor & exec, ResultsCollector & results, bool copy_root_dir, const WriteSettings & settings)
|
2020-03-19 16:37:55 +00:00
|
|
|
{
|
2020-08-05 16:36:10 +00:00
|
|
|
if (from_disk.isFile(from_path))
|
2020-03-19 16:37:55 +00:00
|
|
|
{
|
2020-08-07 11:40:19 +00:00
|
|
|
auto result = exec.execute(
|
2022-10-20 17:51:27 +00:00
|
|
|
[&from_disk, from_path, &to_disk, to_path, &settings]()
|
2020-08-05 16:36:10 +00:00
|
|
|
{
|
2020-08-07 11:40:19 +00:00
|
|
|
setThreadName("DiskCopier");
|
2022-10-20 17:51:27 +00:00
|
|
|
from_disk.copyFile(from_path, to_disk, fs::path(to_path) / fileName(from_path), settings);
|
2020-08-07 11:40:19 +00:00
|
|
|
});
|
|
|
|
|
|
|
|
results.push_back(std::move(result));
|
2020-03-19 16:37:55 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2022-04-21 12:39:12 +00:00
|
|
|
fs::path dest(to_path);
|
|
|
|
if (copy_root_dir)
|
|
|
|
{
|
|
|
|
fs::path dir_name = fs::path(from_path).parent_path().filename();
|
|
|
|
dest /= dir_name;
|
|
|
|
to_disk.createDirectories(dest);
|
|
|
|
}
|
2020-08-05 16:36:10 +00:00
|
|
|
|
|
|
|
for (auto it = from_disk.iterateDirectory(from_path); it->isValid(); it->next())
|
2022-10-20 17:51:27 +00:00
|
|
|
asyncCopy(from_disk, it->path(), to_disk, dest, exec, results, true, settings);
|
2020-08-07 11:40:19 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-04-21 12:39:12 +00:00
|
|
|
void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path, bool copy_root_dir)
|
2020-08-07 11:40:19 +00:00
|
|
|
{
|
2020-08-11 19:08:32 +00:00
|
|
|
auto & exec = to_disk->getExecutor();
|
2020-08-07 11:40:19 +00:00
|
|
|
ResultsCollector results;
|
|
|
|
|
2022-10-20 17:51:27 +00:00
|
|
|
WriteSettings settings;
|
|
|
|
/// Disable parallel write. We already copy in parallel.
|
|
|
|
/// Avoid high memory usage. See test_s3_zero_copy_ttl/test.py::test_move_and_s3_memory_usage
|
|
|
|
settings.s3_allow_parallel_part_upload = false;
|
|
|
|
|
|
|
|
asyncCopy(*this, from_path, *to_disk, to_path, exec, results, copy_root_dir, settings);
|
2020-08-07 11:40:19 +00:00
|
|
|
|
|
|
|
for (auto & result : results)
|
|
|
|
result.wait();
|
|
|
|
for (auto & result : results)
|
|
|
|
result.get();
|
|
|
|
}
|
2020-03-19 16:37:55 +00:00
|
|
|
|
2021-07-11 19:26:39 +00:00
|
|
|
void IDisk::copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path)
|
|
|
|
{
|
2022-04-21 12:39:12 +00:00
|
|
|
copyThroughBuffers(from_path, to_disk, to_path, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void IDisk::copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir)
|
|
|
|
{
|
|
|
|
if (!to_disk->exists(to_dir))
|
|
|
|
to_disk->createDirectories(to_dir);
|
|
|
|
|
|
|
|
copyThroughBuffers(from_dir, to_disk, to_dir, false);
|
2021-07-11 19:26:39 +00:00
|
|
|
}
|
|
|
|
|
2020-07-16 02:34:43 +00:00
|
|
|
void IDisk::truncateFile(const String &, size_t)
|
2020-07-12 02:31:58 +00:00
|
|
|
{
|
2022-08-19 14:58:30 +00:00
|
|
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Truncate operation is not implemented for disk of type {}", getDataSourceDescription().type);
|
2020-07-12 02:31:58 +00:00
|
|
|
}
|
|
|
|
|
2021-01-26 13:29:45 +00:00
|
|
|
SyncGuardPtr IDisk::getDirectorySyncGuard(const String & /* path */) const
|
|
|
|
{
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2022-11-18 20:48:50 +00:00
|
|
|
void IDisk::startup(ContextPtr context, bool skip_access_check)
|
2022-11-17 20:20:36 +00:00
|
|
|
{
|
2022-11-18 20:48:50 +00:00
|
|
|
if (!skip_access_check)
|
2022-11-17 20:20:36 +00:00
|
|
|
{
|
2022-11-18 20:48:50 +00:00
|
|
|
if (isReadOnly())
|
|
|
|
{
|
|
|
|
LOG_DEBUG(&Poco::Logger::get("IDisk"),
|
|
|
|
"Skip access check for disk {} (read-only disk).",
|
|
|
|
getName());
|
|
|
|
}
|
|
|
|
else
|
|
|
|
checkAccess();
|
2022-11-17 20:20:36 +00:00
|
|
|
}
|
2022-11-18 20:48:50 +00:00
|
|
|
startupImpl(context);
|
|
|
|
}
|
2022-11-17 20:20:36 +00:00
|
|
|
|
2022-11-18 20:48:50 +00:00
|
|
|
void IDisk::checkAccess()
|
|
|
|
{
|
2022-11-17 20:20:36 +00:00
|
|
|
DB::UUID server_uuid = DB::ServerUUID::get();
|
|
|
|
if (server_uuid == DB::UUIDHelpers::Nil)
|
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Server UUID is not initialized");
|
|
|
|
const String path = fmt::format("clickhouse_access_check_{}", DB::toString(server_uuid));
|
|
|
|
|
2022-11-18 20:48:50 +00:00
|
|
|
checkAccessImpl(path);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// NOTE: should we mark the disk readonly if the write/unlink fails instead of throws?
|
|
|
|
void IDisk::checkAccessImpl(const String & path)
|
|
|
|
try
|
|
|
|
{
|
|
|
|
const std::string_view payload("test", 4);
|
2022-11-17 20:20:36 +00:00
|
|
|
|
|
|
|
/// write
|
|
|
|
{
|
|
|
|
auto file = writeFile(path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite);
|
|
|
|
try
|
|
|
|
{
|
|
|
|
file->write(payload.data(), payload.size());
|
|
|
|
}
|
|
|
|
catch (...)
|
|
|
|
{
|
|
|
|
/// Log current exception, because finalize() can throw a different exception.
|
|
|
|
tryLogCurrentException(__PRETTY_FUNCTION__);
|
|
|
|
file->finalize();
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// read
|
|
|
|
{
|
|
|
|
auto file = readFile(path);
|
|
|
|
String buf(payload.size(), '0');
|
|
|
|
file->readStrict(buf.data(), buf.size());
|
|
|
|
if (buf != payload)
|
|
|
|
{
|
|
|
|
throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA,
|
|
|
|
"Content of {}::{} does not matches after read ({} vs {})", name, path, buf, payload);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// read with offset
|
|
|
|
{
|
|
|
|
auto file = readFile(path);
|
|
|
|
auto offset = 2;
|
|
|
|
String buf(payload.size() - offset, '0');
|
|
|
|
file->seek(offset, 0);
|
|
|
|
file->readStrict(buf.data(), buf.size());
|
|
|
|
if (buf != payload.substr(offset))
|
|
|
|
{
|
|
|
|
throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA,
|
|
|
|
"Content of {}::{} does not matches after read with offset ({} vs {})", name, path, buf, payload.substr(offset));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// remove
|
|
|
|
removeFile(path);
|
|
|
|
}
|
|
|
|
catch (Exception & e)
|
|
|
|
{
|
|
|
|
e.addMessage(fmt::format("While checking access for disk {}", name));
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
|
2019-12-12 08:57:25 +00:00
|
|
|
}
|