mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-19 16:20:50 +00:00
Store plain_rewritable metadata in a separate layout
This commit is contained in:
parent
183a3521de
commit
d6da86dad2
@ -19,11 +19,11 @@ ObjectStorageKey CommonPathPrefixKeyGenerator::generate(const String & path, boo
|
||||
{
|
||||
const auto & [object_key_prefix, suffix_parts] = getLongestObjectKeyPrefix(path);
|
||||
|
||||
auto key = std::filesystem::path(object_key_prefix.empty() ? storage_key_prefix : object_key_prefix);
|
||||
auto key = std::filesystem::path(object_key_prefix.empty() ? std::string() : object_key_prefix);
|
||||
|
||||
/// The longest prefix is the same as path, meaning that the path is already mapped.
|
||||
if (suffix_parts.empty())
|
||||
return ObjectStorageKey::createAsRelative(std::move(key));
|
||||
return ObjectStorageKey::createAsRelative(storage_key_prefix, std::move(key));
|
||||
|
||||
/// File and top-level directory paths are mapped as is.
|
||||
if (!is_directory || object_key_prefix.empty())
|
||||
@ -39,7 +39,7 @@ ObjectStorageKey CommonPathPrefixKeyGenerator::generate(const String & path, boo
|
||||
key /= getRandomASCIIString(part_size);
|
||||
}
|
||||
|
||||
return ObjectStorageKey::createAsRelative(key);
|
||||
return ObjectStorageKey::createAsRelative(storage_key_prefix, key);
|
||||
}
|
||||
|
||||
std::tuple<std::string, std::vector<std::string>> CommonPathPrefixKeyGenerator::getLongestObjectKeyPrefix(const std::string & path) const
|
||||
|
@ -7,6 +7,7 @@
|
||||
|
||||
#include <filesystem>
|
||||
#include <tuple>
|
||||
#include <unordered_set>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -79,14 +80,16 @@ std::vector<std::string> MetadataStorageFromPlainObjectStorage::listDirectory(co
|
||||
|
||||
object_storage->listObjects(abs_key, files, 0);
|
||||
|
||||
return getDirectChildrenOnDisk(abs_key, files, path);
|
||||
std::unordered_set<std::string> directories;
|
||||
getDirectChildrenOnDisk(abs_key, object_storage->getCommonKeyPrefix(), files, path, directories);
|
||||
return std::vector<std::string>(std::make_move_iterator(directories.begin()), std::make_move_iterator(directories.end()));
|
||||
}
|
||||
|
||||
DirectoryIteratorPtr MetadataStorageFromPlainObjectStorage::iterateDirectory(const std::string & path) const
|
||||
{
|
||||
/// Required for MergeTree
|
||||
auto paths = listDirectory(path);
|
||||
// Prepend path, since iterateDirectory() includes path, unlike listDirectory()
|
||||
/// Prepend path, since iterateDirectory() includes path, unlike listDirectory()
|
||||
std::for_each(paths.begin(), paths.end(), [&](auto & child) { child = fs::path(path) / child; });
|
||||
std::vector<std::filesystem::path> fs_paths(paths.begin(), paths.end());
|
||||
return std::make_unique<StaticDirectoryIterator>(std::move(fs_paths));
|
||||
@ -99,10 +102,13 @@ StoredObjects MetadataStorageFromPlainObjectStorage::getStorageObjects(const std
|
||||
return {StoredObject(object_key.serialize(), path, object_size)};
|
||||
}
|
||||
|
||||
std::vector<std::string> MetadataStorageFromPlainObjectStorage::getDirectChildrenOnDisk(
|
||||
const std::string & storage_key, const RelativePathsWithMetadata & remote_paths, const std::string & /* local_path */) const
|
||||
void MetadataStorageFromPlainObjectStorage::getDirectChildrenOnDisk(
|
||||
const std::string & storage_key,
|
||||
const std::string & /* storage_key_perfix */,
|
||||
const RelativePathsWithMetadata & remote_paths,
|
||||
const std::string & /* local_path */,
|
||||
std::unordered_set<std::string> & result) const
|
||||
{
|
||||
std::unordered_set<std::string> duplicates_filter;
|
||||
for (const auto & elem : remote_paths)
|
||||
{
|
||||
const auto & path = elem->relative_path;
|
||||
@ -111,11 +117,10 @@ std::vector<std::string> MetadataStorageFromPlainObjectStorage::getDirectChildre
|
||||
/// string::npos is ok.
|
||||
const auto slash_pos = path.find('/', child_pos);
|
||||
if (slash_pos == std::string::npos)
|
||||
duplicates_filter.emplace(path.substr(child_pos));
|
||||
result.emplace(path.substr(child_pos));
|
||||
else
|
||||
duplicates_filter.emplace(path.substr(child_pos, slash_pos - child_pos));
|
||||
result.emplace(path.substr(child_pos, slash_pos - child_pos));
|
||||
}
|
||||
return std::vector<std::string>(std::make_move_iterator(duplicates_filter.begin()), std::make_move_iterator(duplicates_filter.end()));
|
||||
}
|
||||
|
||||
const IMetadataStorage & MetadataStorageFromPlainObjectStorageTransaction::getStorageForNonTransactionalReads() const
|
||||
@ -140,7 +145,7 @@ void MetadataStorageFromPlainObjectStorageTransaction::removeDirectory(const std
|
||||
else
|
||||
{
|
||||
addOperation(std::make_unique<MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation>(
|
||||
normalizeDirectoryPath(path), *metadata_storage.getPathMap(), object_storage));
|
||||
normalizeDirectoryPath(path), *metadata_storage.getPathMap(), object_storage, metadata_storage.getMetadataKeyPrefix()));
|
||||
}
|
||||
}
|
||||
|
||||
@ -151,8 +156,13 @@ void MetadataStorageFromPlainObjectStorageTransaction::createDirectory(const std
|
||||
|
||||
auto normalized_path = normalizeDirectoryPath(path);
|
||||
auto key_prefix = object_storage->generateObjectKeyPrefixForDirectoryPath(normalized_path).serialize();
|
||||
chassert(key_prefix.starts_with(object_storage->getCommonKeyPrefix()));
|
||||
auto op = std::make_unique<MetadataStorageFromPlainObjectStorageCreateDirectoryOperation>(
|
||||
std::move(normalized_path), std::move(key_prefix), *metadata_storage.getPathMap(), object_storage);
|
||||
std::move(normalized_path),
|
||||
key_prefix.substr(object_storage->getCommonKeyPrefix().size()),
|
||||
*metadata_storage.getPathMap(),
|
||||
object_storage,
|
||||
metadata_storage.getMetadataKeyPrefix());
|
||||
addOperation(std::move(op));
|
||||
}
|
||||
|
||||
@ -167,7 +177,11 @@ void MetadataStorageFromPlainObjectStorageTransaction::moveDirectory(const std::
|
||||
throwNotImplemented();
|
||||
|
||||
addOperation(std::make_unique<MetadataStorageFromPlainObjectStorageMoveDirectoryOperation>(
|
||||
normalizeDirectoryPath(path_from), normalizeDirectoryPath(path_to), *metadata_storage.getPathMap(), object_storage));
|
||||
normalizeDirectoryPath(path_from),
|
||||
normalizeDirectoryPath(path_to),
|
||||
*metadata_storage.getPathMap(),
|
||||
object_storage,
|
||||
metadata_storage.getMetadataKeyPrefix()));
|
||||
}
|
||||
|
||||
void MetadataStorageFromPlainObjectStorageTransaction::addBlobToMetadata(
|
||||
|
@ -6,6 +6,8 @@
|
||||
#include <Disks/ObjectStorages/MetadataStorageTransactionState.h>
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -78,10 +80,20 @@ public:
|
||||
bool supportsStat() const override { return false; }
|
||||
|
||||
protected:
|
||||
/// Get the object storage prefix for storing metadata files. If stored behind a separate endpoint,
|
||||
/// the metadata keys reflect the layout of the regular files.
|
||||
virtual std::string getMetadataKeyPrefix() const { return object_storage->getCommonKeyPrefix(); }
|
||||
|
||||
/// Returns a map of local paths to paths in object storage.
|
||||
virtual std::shared_ptr<PathMap> getPathMap() const { throwNotImplemented(); }
|
||||
|
||||
virtual std::vector<std::string> getDirectChildrenOnDisk(
|
||||
const std::string & storage_key, const RelativePathsWithMetadata & remote_paths, const std::string & local_path) const;
|
||||
/// Retrieves the immediate files and directories within a given directory on a disk.
|
||||
virtual void getDirectChildrenOnDisk(
|
||||
const std::string & storage_key,
|
||||
const std::string & storage_key_perfix,
|
||||
const RelativePathsWithMetadata & remote_paths,
|
||||
const std::string & local_path,
|
||||
std::unordered_set<std::string> & result) const;
|
||||
};
|
||||
|
||||
class MetadataStorageFromPlainObjectStorageTransaction final : public IMetadataTransaction, private MetadataOperationsHolder
|
||||
|
@ -20,14 +20,24 @@ namespace
|
||||
|
||||
constexpr auto PREFIX_PATH_FILE_NAME = "prefix.path";
|
||||
|
||||
ObjectStorageKey createMetadataObjectKey(const std::string & key_prefix, const std::string & metadata_key_prefix)
|
||||
{
|
||||
auto prefix = std::filesystem::path(metadata_key_prefix) / key_prefix;
|
||||
return ObjectStorageKey::createAsRelative(prefix.string(), PREFIX_PATH_FILE_NAME);
|
||||
}
|
||||
}
|
||||
|
||||
MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::MetadataStorageFromPlainObjectStorageCreateDirectoryOperation(
|
||||
std::filesystem::path && path_,
|
||||
std::string && key_prefix_,
|
||||
MetadataStorageFromPlainObjectStorage::PathMap & path_map_,
|
||||
ObjectStoragePtr object_storage_)
|
||||
: path(std::move(path_)), key_prefix(key_prefix_), path_map(path_map_), object_storage(object_storage_)
|
||||
ObjectStoragePtr object_storage_,
|
||||
const std::string & metadata_key_prefix_)
|
||||
: path(std::move(path_))
|
||||
, key_prefix(key_prefix_)
|
||||
, path_map(path_map_)
|
||||
, object_storage(object_storage_)
|
||||
, metadata_key_prefix(metadata_key_prefix_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -36,13 +46,17 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std:
|
||||
if (path_map.contains(path))
|
||||
return;
|
||||
|
||||
LOG_TRACE(getLogger("MetadataStorageFromPlainObjectStorageCreateDirectoryOperation"), "Creating metadata for directory '{}'", path);
|
||||
auto metadata_object_key = createMetadataObjectKey(key_prefix, metadata_key_prefix);
|
||||
|
||||
auto object_key = ObjectStorageKey::createAsRelative(key_prefix, PREFIX_PATH_FILE_NAME);
|
||||
LOG_TRACE(
|
||||
getLogger("MetadataStorageFromPlainObjectStorageCreateDirectoryOperation"),
|
||||
"Creating metadata for directory '{}' with remote path='{}'",
|
||||
path,
|
||||
metadata_object_key.serialize());
|
||||
|
||||
auto object = StoredObject(object_key.serialize(), path / PREFIX_PATH_FILE_NAME);
|
||||
auto metadata_object = StoredObject(metadata_object_key.serialize(), path / PREFIX_PATH_FILE_NAME);
|
||||
auto buf = object_storage->writeObject(
|
||||
object,
|
||||
metadata_object,
|
||||
WriteMode::Rewrite,
|
||||
/* object_attributes */ std::nullopt,
|
||||
/* buf_size */ DBMS_DEFAULT_BUFFER_SIZE,
|
||||
@ -66,25 +80,31 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std:
|
||||
|
||||
void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::undo(std::unique_lock<SharedMutex> &)
|
||||
{
|
||||
auto object_key = ObjectStorageKey::createAsRelative(key_prefix, PREFIX_PATH_FILE_NAME);
|
||||
auto metadata_object_key = createMetadataObjectKey(key_prefix, metadata_key_prefix);
|
||||
|
||||
if (write_finalized)
|
||||
{
|
||||
path_map.erase(path);
|
||||
auto metric = object_storage->getMetadataStorageMetrics().directory_map_size;
|
||||
CurrentMetrics::sub(metric, 1);
|
||||
|
||||
object_storage->removeObject(StoredObject(object_key.serialize(), path / PREFIX_PATH_FILE_NAME));
|
||||
object_storage->removeObject(StoredObject(metadata_object_key.serialize(), path / PREFIX_PATH_FILE_NAME));
|
||||
}
|
||||
else if (write_created)
|
||||
object_storage->removeObjectIfExists(StoredObject(object_key.serialize(), path / PREFIX_PATH_FILE_NAME));
|
||||
object_storage->removeObjectIfExists(StoredObject(metadata_object_key.serialize(), path / PREFIX_PATH_FILE_NAME));
|
||||
}
|
||||
|
||||
MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::MetadataStorageFromPlainObjectStorageMoveDirectoryOperation(
|
||||
std::filesystem::path && path_from_,
|
||||
std::filesystem::path && path_to_,
|
||||
MetadataStorageFromPlainObjectStorage::PathMap & path_map_,
|
||||
ObjectStoragePtr object_storage_)
|
||||
: path_from(std::move(path_from_)), path_to(std::move(path_to_)), path_map(path_map_), object_storage(object_storage_)
|
||||
ObjectStoragePtr object_storage_,
|
||||
const std::string & metadata_key_prefix_)
|
||||
: path_from(std::move(path_from_))
|
||||
, path_to(std::move(path_to_))
|
||||
, path_map(path_map_)
|
||||
, object_storage(object_storage_)
|
||||
, metadata_key_prefix(metadata_key_prefix_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -98,26 +118,26 @@ std::unique_ptr<WriteBufferFromFileBase> MetadataStorageFromPlainObjectStorageMo
|
||||
if (path_map.contains(new_path))
|
||||
throw Exception(ErrorCodes::FILE_ALREADY_EXISTS, "Metadata object for the new (destination) path '{}' already exists", new_path);
|
||||
|
||||
auto object_key = ObjectStorageKey::createAsRelative(expected_it->second, PREFIX_PATH_FILE_NAME);
|
||||
auto metadata_object_key = createMetadataObjectKey(expected_it->second, metadata_key_prefix);
|
||||
|
||||
auto object = StoredObject(object_key.serialize(), expected_path / PREFIX_PATH_FILE_NAME);
|
||||
auto metadata_object = StoredObject(metadata_object_key.serialize(), expected_path / PREFIX_PATH_FILE_NAME);
|
||||
|
||||
if (validate_content)
|
||||
{
|
||||
std::string data;
|
||||
auto read_buf = object_storage->readObject(object);
|
||||
auto read_buf = object_storage->readObject(metadata_object);
|
||||
readStringUntilEOF(data, *read_buf);
|
||||
if (data != path_from)
|
||||
throw Exception(
|
||||
ErrorCodes::INCORRECT_DATA,
|
||||
"Incorrect data for object key {}, expected {}, got {}",
|
||||
object_key.serialize(),
|
||||
metadata_object_key.serialize(),
|
||||
expected_path,
|
||||
data);
|
||||
}
|
||||
|
||||
auto write_buf = object_storage->writeObject(
|
||||
object,
|
||||
metadata_object,
|
||||
WriteMode::Rewrite,
|
||||
/* object_attributes */ std::nullopt,
|
||||
/*buf_size*/ DBMS_DEFAULT_BUFFER_SIZE,
|
||||
@ -156,8 +176,11 @@ void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::undo(std::uniq
|
||||
}
|
||||
|
||||
MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation(
|
||||
std::filesystem::path && path_, MetadataStorageFromPlainObjectStorage::PathMap & path_map_, ObjectStoragePtr object_storage_)
|
||||
: path(std::move(path_)), path_map(path_map_), object_storage(object_storage_)
|
||||
std::filesystem::path && path_,
|
||||
MetadataStorageFromPlainObjectStorage::PathMap & path_map_,
|
||||
ObjectStoragePtr object_storage_,
|
||||
const std::string & metadata_key_prefix_)
|
||||
: path(std::move(path_)), path_map(path_map_), object_storage(object_storage_), metadata_key_prefix(metadata_key_prefix_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -170,9 +193,9 @@ void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::execute(std:
|
||||
LOG_TRACE(getLogger("MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation"), "Removing directory '{}'", path);
|
||||
|
||||
key_prefix = path_it->second;
|
||||
auto object_key = ObjectStorageKey::createAsRelative(key_prefix, PREFIX_PATH_FILE_NAME);
|
||||
auto object = StoredObject(object_key.serialize(), path / PREFIX_PATH_FILE_NAME);
|
||||
object_storage->removeObject(object);
|
||||
auto metadata_object_key = createMetadataObjectKey(key_prefix, metadata_key_prefix);
|
||||
auto metadata_object = StoredObject(metadata_object_key.serialize(), path / PREFIX_PATH_FILE_NAME);
|
||||
object_storage->removeObject(metadata_object);
|
||||
|
||||
path_map.erase(path_it);
|
||||
auto metric = object_storage->getMetadataStorageMetrics().directory_map_size;
|
||||
@ -189,10 +212,10 @@ void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::undo(std::un
|
||||
if (!removed)
|
||||
return;
|
||||
|
||||
auto object_key = ObjectStorageKey::createAsRelative(key_prefix, PREFIX_PATH_FILE_NAME);
|
||||
auto object = StoredObject(object_key.serialize(), path / PREFIX_PATH_FILE_NAME);
|
||||
auto metadata_object_key = createMetadataObjectKey(key_prefix, metadata_key_prefix);
|
||||
auto metadata_object = StoredObject(metadata_object_key.serialize(), path / PREFIX_PATH_FILE_NAME);
|
||||
auto buf = object_storage->writeObject(
|
||||
object,
|
||||
metadata_object,
|
||||
WriteMode::Rewrite,
|
||||
/* object_attributes */ std::nullopt,
|
||||
/* buf_size */ DBMS_DEFAULT_BUFFER_SIZE,
|
||||
|
@ -16,6 +16,7 @@ private:
|
||||
std::string key_prefix;
|
||||
MetadataStorageFromPlainObjectStorage::PathMap & path_map;
|
||||
ObjectStoragePtr object_storage;
|
||||
const std::string metadata_key_prefix;
|
||||
|
||||
bool write_created = false;
|
||||
bool write_finalized = false;
|
||||
@ -26,7 +27,8 @@ public:
|
||||
std::filesystem::path && path_,
|
||||
std::string && key_prefix_,
|
||||
MetadataStorageFromPlainObjectStorage::PathMap & path_map_,
|
||||
ObjectStoragePtr object_storage_);
|
||||
ObjectStoragePtr object_storage_,
|
||||
const std::string & metadata_key_prefix_);
|
||||
|
||||
void execute(std::unique_lock<SharedMutex> & metadata_lock) override;
|
||||
void undo(std::unique_lock<SharedMutex> & metadata_lock) override;
|
||||
@ -39,6 +41,7 @@ private:
|
||||
std::filesystem::path path_to;
|
||||
MetadataStorageFromPlainObjectStorage::PathMap & path_map;
|
||||
ObjectStoragePtr object_storage;
|
||||
const std::string metadata_key_prefix;
|
||||
|
||||
bool write_created = false;
|
||||
bool write_finalized = false;
|
||||
@ -51,7 +54,8 @@ public:
|
||||
std::filesystem::path && path_from_,
|
||||
std::filesystem::path && path_to_,
|
||||
MetadataStorageFromPlainObjectStorage::PathMap & path_map_,
|
||||
ObjectStoragePtr object_storage_);
|
||||
ObjectStoragePtr object_storage_,
|
||||
const std::string & metadata_key_prefix_);
|
||||
|
||||
void execute(std::unique_lock<SharedMutex> & metadata_lock) override;
|
||||
|
||||
@ -65,13 +69,17 @@ private:
|
||||
|
||||
MetadataStorageFromPlainObjectStorage::PathMap & path_map;
|
||||
ObjectStoragePtr object_storage;
|
||||
const std::string metadata_key_prefix;
|
||||
|
||||
std::string key_prefix;
|
||||
bool removed = false;
|
||||
|
||||
public:
|
||||
MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation(
|
||||
std::filesystem::path && path_, MetadataStorageFromPlainObjectStorage::PathMap & path_map_, ObjectStoragePtr object_storage_);
|
||||
std::filesystem::path && path_,
|
||||
MetadataStorageFromPlainObjectStorage::PathMap & path_map_,
|
||||
ObjectStoragePtr object_storage_,
|
||||
const std::string & metadata_key_prefix_);
|
||||
|
||||
void execute(std::unique_lock<SharedMutex> & metadata_lock) override;
|
||||
void undo(std::unique_lock<SharedMutex> & metadata_lock) override;
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h>
|
||||
#include <Disks/ObjectStorages/ObjectStorageIterator.h>
|
||||
|
||||
#include <unordered_set>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/SharedThreadPools.h>
|
||||
#include <IO/S3Common.h>
|
||||
@ -21,8 +22,22 @@ namespace
|
||||
{
|
||||
|
||||
constexpr auto PREFIX_PATH_FILE_NAME = "prefix.path";
|
||||
constexpr auto METADATA_PATH_TOKEN = "__meta/";
|
||||
|
||||
MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::string & root, ObjectStoragePtr object_storage)
|
||||
/// Use a separate layout for metadata iff:
|
||||
/// 1. The disk endpoint does not contain objects, OR
|
||||
/// 2. The metadata is already stored behind a separate endpoint.
|
||||
/// Otherwise, store metadata along with regular data for backward compatibility.
|
||||
std::string getMetadataKeyPrefix(ObjectStoragePtr object_storage)
|
||||
{
|
||||
const auto common_key_prefix = std::filesystem::path(object_storage->getCommonKeyPrefix());
|
||||
const auto metadata_key_prefix = std::filesystem::path(common_key_prefix) / METADATA_PATH_TOKEN;
|
||||
return !object_storage->existsOrHasAnyChild(metadata_key_prefix / "") && object_storage->existsOrHasAnyChild(common_key_prefix / "")
|
||||
? common_key_prefix
|
||||
: metadata_key_prefix;
|
||||
}
|
||||
|
||||
MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::string & metadata_key_prefix, ObjectStoragePtr object_storage)
|
||||
{
|
||||
MetadataStorageFromPlainObjectStorage::PathMap result;
|
||||
|
||||
@ -39,16 +54,16 @@ MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::stri
|
||||
|
||||
LOG_DEBUG(log, "Loading metadata");
|
||||
size_t num_files = 0;
|
||||
for (auto iterator = object_storage->iterate(root, 0); iterator->isValid(); iterator->next())
|
||||
for (auto iterator = object_storage->iterate(metadata_key_prefix, 0); iterator->isValid(); iterator->next())
|
||||
{
|
||||
++num_files;
|
||||
auto file = iterator->current();
|
||||
String path = file->getPath();
|
||||
auto remote_path = std::filesystem::path(path);
|
||||
if (remote_path.filename() != PREFIX_PATH_FILE_NAME)
|
||||
auto remote_metadata_path = std::filesystem::path(path);
|
||||
if (remote_metadata_path.filename() != PREFIX_PATH_FILE_NAME)
|
||||
continue;
|
||||
|
||||
runner([remote_path, path, &object_storage, &result, &mutex, &log, &settings]
|
||||
runner([remote_metadata_path, path, &object_storage, &result, &mutex, &log, &settings, &metadata_key_prefix]
|
||||
{
|
||||
setThreadName("PlainRWMetaLoad");
|
||||
|
||||
@ -75,7 +90,10 @@ MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::stri
|
||||
throw;
|
||||
}
|
||||
|
||||
chassert(remote_path.has_parent_path());
|
||||
chassert(remote_metadata_path.has_parent_path());
|
||||
chassert(remote_metadata_path.string().starts_with(metadata_key_prefix));
|
||||
auto suffix = remote_metadata_path.string().substr(metadata_key_prefix.size());
|
||||
auto remote_path = std::filesystem::path(std::move(suffix));
|
||||
std::pair<MetadataStorageFromPlainObjectStorage::PathMap::iterator, bool> res;
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
@ -103,17 +121,17 @@ MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::stri
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<std::string> getDirectChildrenOnRewritableDisk(
|
||||
void getDirectChildrenOnRewritableDisk(
|
||||
const std::string & storage_key,
|
||||
const std::string & storage_key_perfix,
|
||||
const RelativePathsWithMetadata & remote_paths,
|
||||
const std::string & local_path,
|
||||
const MetadataStorageFromPlainObjectStorage::PathMap & local_path_prefixes,
|
||||
SharedMutex & shared_mutex)
|
||||
SharedMutex & shared_mutex,
|
||||
std::unordered_set<std::string> & result)
|
||||
{
|
||||
using PathMap = MetadataStorageFromPlainObjectStorage::PathMap;
|
||||
|
||||
std::unordered_set<std::string> duplicates_filter;
|
||||
|
||||
/// Map remote paths into local subdirectories.
|
||||
std::unordered_map<PathMap::mapped_type, PathMap::key_type> remote_to_local_subdir;
|
||||
|
||||
@ -149,22 +167,21 @@ std::vector<std::string> getDirectChildrenOnRewritableDisk(
|
||||
/// File names.
|
||||
auto filename = path.substr(child_pos);
|
||||
if (!skip_list.contains(filename))
|
||||
duplicates_filter.emplace(std::move(filename));
|
||||
result.emplace(std::move(filename));
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Subdirectories.
|
||||
auto it = remote_to_local_subdir.find(path.substr(0, slash_pos));
|
||||
chassert(path.find(storage_key_perfix) == 0);
|
||||
auto it = remote_to_local_subdir.find(path.substr(storage_key_perfix.size(), slash_pos - storage_key_perfix.size()));
|
||||
/// Mapped subdirectories.
|
||||
if (it != remote_to_local_subdir.end())
|
||||
duplicates_filter.emplace(it->second);
|
||||
result.emplace(it->second);
|
||||
/// The remote subdirectory name is the same as the local subdirectory.
|
||||
else
|
||||
duplicates_filter.emplace(path.substr(child_pos, slash_pos - child_pos));
|
||||
result.emplace(path.substr(child_pos, slash_pos - child_pos));
|
||||
}
|
||||
}
|
||||
|
||||
return std::vector<std::string>(std::make_move_iterator(duplicates_filter.begin()), std::make_move_iterator(duplicates_filter.end()));
|
||||
}
|
||||
|
||||
}
|
||||
@ -172,7 +189,8 @@ std::vector<std::string> getDirectChildrenOnRewritableDisk(
|
||||
MetadataStorageFromPlainRewritableObjectStorage::MetadataStorageFromPlainRewritableObjectStorage(
|
||||
ObjectStoragePtr object_storage_, String storage_path_prefix_)
|
||||
: MetadataStorageFromPlainObjectStorage(object_storage_, storage_path_prefix_)
|
||||
, path_map(std::make_shared<PathMap>(loadPathPrefixMap(object_storage->getCommonKeyPrefix(), object_storage)))
|
||||
, metadata_key_prefix(DB::getMetadataKeyPrefix(object_storage))
|
||||
, path_map(std::make_shared<PathMap>(loadPathPrefixMap(metadata_key_prefix, object_storage)))
|
||||
{
|
||||
if (object_storage->isWriteOnce())
|
||||
throw Exception(
|
||||
@ -190,10 +208,71 @@ MetadataStorageFromPlainRewritableObjectStorage::~MetadataStorageFromPlainRewrit
|
||||
CurrentMetrics::sub(metric, path_map->size());
|
||||
}
|
||||
|
||||
std::vector<std::string> MetadataStorageFromPlainRewritableObjectStorage::getDirectChildrenOnDisk(
|
||||
const std::string & storage_key, const RelativePathsWithMetadata & remote_paths, const std::string & local_path) const
|
||||
bool MetadataStorageFromPlainRewritableObjectStorage::exists(const std::string & path) const
|
||||
{
|
||||
return getDirectChildrenOnRewritableDisk(storage_key, remote_paths, local_path, *getPathMap(), metadata_mutex);
|
||||
if (MetadataStorageFromPlainObjectStorage::exists(path))
|
||||
return true;
|
||||
|
||||
if (getMetadataKeyPrefix() != object_storage->getCommonKeyPrefix())
|
||||
{
|
||||
auto key_prefix = object_storage->generateObjectKeyForPath(path).serialize();
|
||||
chassert(key_prefix.starts_with(object_storage->getCommonKeyPrefix()));
|
||||
auto metadata_key = std::filesystem::path(getMetadataKeyPrefix()) / key_prefix.substr(object_storage->getCommonKeyPrefix().size());
|
||||
return object_storage->existsOrHasAnyChild(metadata_key);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool MetadataStorageFromPlainRewritableObjectStorage::isDirectory(const std::string & path) const
|
||||
{
|
||||
if (getMetadataKeyPrefix() != object_storage->getCommonKeyPrefix())
|
||||
{
|
||||
auto directory = std::filesystem::path(object_storage->generateObjectKeyForPath(path).serialize()) / "";
|
||||
chassert(directory.string().starts_with(object_storage->getCommonKeyPrefix()));
|
||||
auto metadata_key
|
||||
= std::filesystem::path(getMetadataKeyPrefix()) / directory.string().substr(object_storage->getCommonKeyPrefix().size());
|
||||
return object_storage->existsOrHasAnyChild(metadata_key);
|
||||
}
|
||||
else
|
||||
return MetadataStorageFromPlainObjectStorage::isDirectory(path);
|
||||
}
|
||||
|
||||
std::vector<std::string> MetadataStorageFromPlainRewritableObjectStorage::listDirectory(const std::string & path) const
|
||||
{
|
||||
auto key_prefix = object_storage->generateObjectKeyForPath(path).serialize();
|
||||
|
||||
RelativePathsWithMetadata files;
|
||||
std::string abs_key = key_prefix;
|
||||
if (!abs_key.ends_with('/'))
|
||||
abs_key += '/';
|
||||
|
||||
object_storage->listObjects(abs_key, files, 0);
|
||||
|
||||
std::unordered_set<std::string> directories;
|
||||
getDirectChildrenOnDisk(abs_key, object_storage->getCommonKeyPrefix(), files, path, directories);
|
||||
/// List empty directories that are identified by the `prefix.path` metadata files. This is required to, e.g., remove
|
||||
/// metadata along with regular files.
|
||||
if (object_storage->getCommonKeyPrefix() != getMetadataKeyPrefix())
|
||||
{
|
||||
chassert(abs_key.starts_with(object_storage->getCommonKeyPrefix()));
|
||||
auto metadata_key = std::filesystem::path(getMetadataKeyPrefix()) / abs_key.substr(object_storage->getCommonKeyPrefix().size());
|
||||
RelativePathsWithMetadata metadata_files;
|
||||
object_storage->listObjects(metadata_key, metadata_files, 0);
|
||||
getDirectChildrenOnDisk(metadata_key, getMetadataKeyPrefix(), metadata_files, path, directories);
|
||||
}
|
||||
|
||||
return std::vector<std::string>(std::make_move_iterator(directories.begin()), std::make_move_iterator(directories.end()));
|
||||
}
|
||||
|
||||
void MetadataStorageFromPlainRewritableObjectStorage::getDirectChildrenOnDisk(
|
||||
const std::string & storage_key,
|
||||
const std::string & storage_key_perfix,
|
||||
const RelativePathsWithMetadata & remote_paths,
|
||||
const std::string & local_path,
|
||||
std::unordered_set<std::string> & result) const
|
||||
{
|
||||
getDirectChildrenOnRewritableDisk(storage_key, storage_key_perfix, remote_paths, local_path, *getPathMap(), metadata_mutex, result);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h>
|
||||
|
||||
#include <memory>
|
||||
#include <unordered_set>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -11,6 +12,7 @@ namespace DB
|
||||
class MetadataStorageFromPlainRewritableObjectStorage final : public MetadataStorageFromPlainObjectStorage
|
||||
{
|
||||
private:
|
||||
const std::string metadata_key_prefix;
|
||||
std::shared_ptr<PathMap> path_map;
|
||||
|
||||
public:
|
||||
@ -18,11 +20,20 @@ public:
|
||||
~MetadataStorageFromPlainRewritableObjectStorage() override;
|
||||
|
||||
MetadataStorageType getType() const override { return MetadataStorageType::PlainRewritable; }
|
||||
bool exists(const std::string & path) const override;
|
||||
bool isDirectory(const std::string & path) const override;
|
||||
std::vector<std::string> listDirectory(const std::string & path) const override;
|
||||
|
||||
|
||||
protected:
|
||||
std::string getMetadataKeyPrefix() const override { return metadata_key_prefix; }
|
||||
std::shared_ptr<PathMap> getPathMap() const override { return path_map; }
|
||||
std::vector<std::string> getDirectChildrenOnDisk(
|
||||
const std::string & storage_key, const RelativePathsWithMetadata & remote_paths, const std::string & local_path) const override;
|
||||
void getDirectChildrenOnDisk(
|
||||
const std::string & storage_key,
|
||||
const std::string & storage_key_perfix,
|
||||
const RelativePathsWithMetadata & remote_paths,
|
||||
const std::string & local_path,
|
||||
std::unordered_set<std::string> & result) const override;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -139,6 +139,19 @@ def test(storage_policy):
|
||||
== insert_values_arr[i]
|
||||
)
|
||||
|
||||
metadata_it = cluster.minio_client.list_objects(
|
||||
cluster.minio_bucket, "data/", recursive=True
|
||||
)
|
||||
metadata_count = 0
|
||||
for obj in list(metadata_it):
|
||||
if "/__meta/" in obj.object_name:
|
||||
assert obj.object_name.endswith("/prefix.path")
|
||||
metadata_count += 1
|
||||
else:
|
||||
assert not obj.object_name.endswith("/prefix.path")
|
||||
|
||||
assert metadata_count > 0
|
||||
|
||||
for i in range(NUM_WORKERS):
|
||||
node = cluster.instances[f"node{i + 1}"]
|
||||
node.query("DROP TABLE IF EXISTS test SYNC")
|
||||
|
Loading…
Reference in New Issue
Block a user