Store plain_rewritable metadata in a separate layout

This commit is contained in:
Julia Kartseva 2024-06-26 01:27:47 +00:00
parent 183a3521de
commit d6da86dad2
8 changed files with 225 additions and 65 deletions

View File

@ -19,11 +19,11 @@ ObjectStorageKey CommonPathPrefixKeyGenerator::generate(const String & path, boo
{
const auto & [object_key_prefix, suffix_parts] = getLongestObjectKeyPrefix(path);
auto key = std::filesystem::path(object_key_prefix.empty() ? storage_key_prefix : object_key_prefix);
auto key = std::filesystem::path(object_key_prefix.empty() ? std::string() : object_key_prefix);
/// The longest prefix is the same as path, meaning that the path is already mapped.
if (suffix_parts.empty())
return ObjectStorageKey::createAsRelative(std::move(key));
return ObjectStorageKey::createAsRelative(storage_key_prefix, std::move(key));
/// File and top-level directory paths are mapped as is.
if (!is_directory || object_key_prefix.empty())
@ -39,7 +39,7 @@ ObjectStorageKey CommonPathPrefixKeyGenerator::generate(const String & path, boo
key /= getRandomASCIIString(part_size);
}
return ObjectStorageKey::createAsRelative(key);
return ObjectStorageKey::createAsRelative(storage_key_prefix, key);
}
std::tuple<std::string, std::vector<std::string>> CommonPathPrefixKeyGenerator::getLongestObjectKeyPrefix(const std::string & path) const

View File

@ -7,6 +7,7 @@
#include <filesystem>
#include <tuple>
#include <unordered_set>
namespace DB
{
@ -79,14 +80,16 @@ std::vector<std::string> MetadataStorageFromPlainObjectStorage::listDirectory(co
object_storage->listObjects(abs_key, files, 0);
return getDirectChildrenOnDisk(abs_key, files, path);
std::unordered_set<std::string> directories;
getDirectChildrenOnDisk(abs_key, object_storage->getCommonKeyPrefix(), files, path, directories);
return std::vector<std::string>(std::make_move_iterator(directories.begin()), std::make_move_iterator(directories.end()));
}
DirectoryIteratorPtr MetadataStorageFromPlainObjectStorage::iterateDirectory(const std::string & path) const
{
/// Required for MergeTree
auto paths = listDirectory(path);
// Prepend path, since iterateDirectory() includes path, unlike listDirectory()
/// Prepend path, since iterateDirectory() includes path, unlike listDirectory()
std::for_each(paths.begin(), paths.end(), [&](auto & child) { child = fs::path(path) / child; });
std::vector<std::filesystem::path> fs_paths(paths.begin(), paths.end());
return std::make_unique<StaticDirectoryIterator>(std::move(fs_paths));
@ -99,10 +102,13 @@ StoredObjects MetadataStorageFromPlainObjectStorage::getStorageObjects(const std
return {StoredObject(object_key.serialize(), path, object_size)};
}
std::vector<std::string> MetadataStorageFromPlainObjectStorage::getDirectChildrenOnDisk(
const std::string & storage_key, const RelativePathsWithMetadata & remote_paths, const std::string & /* local_path */) const
void MetadataStorageFromPlainObjectStorage::getDirectChildrenOnDisk(
const std::string & storage_key,
const std::string & /* storage_key_perfix */,
const RelativePathsWithMetadata & remote_paths,
const std::string & /* local_path */,
std::unordered_set<std::string> & result) const
{
std::unordered_set<std::string> duplicates_filter;
for (const auto & elem : remote_paths)
{
const auto & path = elem->relative_path;
@ -111,11 +117,10 @@ std::vector<std::string> MetadataStorageFromPlainObjectStorage::getDirectChildre
/// string::npos is ok.
const auto slash_pos = path.find('/', child_pos);
if (slash_pos == std::string::npos)
duplicates_filter.emplace(path.substr(child_pos));
result.emplace(path.substr(child_pos));
else
duplicates_filter.emplace(path.substr(child_pos, slash_pos - child_pos));
result.emplace(path.substr(child_pos, slash_pos - child_pos));
}
return std::vector<std::string>(std::make_move_iterator(duplicates_filter.begin()), std::make_move_iterator(duplicates_filter.end()));
}
const IMetadataStorage & MetadataStorageFromPlainObjectStorageTransaction::getStorageForNonTransactionalReads() const
@ -140,7 +145,7 @@ void MetadataStorageFromPlainObjectStorageTransaction::removeDirectory(const std
else
{
addOperation(std::make_unique<MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation>(
normalizeDirectoryPath(path), *metadata_storage.getPathMap(), object_storage));
normalizeDirectoryPath(path), *metadata_storage.getPathMap(), object_storage, metadata_storage.getMetadataKeyPrefix()));
}
}
@ -151,8 +156,13 @@ void MetadataStorageFromPlainObjectStorageTransaction::createDirectory(const std
auto normalized_path = normalizeDirectoryPath(path);
auto key_prefix = object_storage->generateObjectKeyPrefixForDirectoryPath(normalized_path).serialize();
chassert(key_prefix.starts_with(object_storage->getCommonKeyPrefix()));
auto op = std::make_unique<MetadataStorageFromPlainObjectStorageCreateDirectoryOperation>(
std::move(normalized_path), std::move(key_prefix), *metadata_storage.getPathMap(), object_storage);
std::move(normalized_path),
key_prefix.substr(object_storage->getCommonKeyPrefix().size()),
*metadata_storage.getPathMap(),
object_storage,
metadata_storage.getMetadataKeyPrefix());
addOperation(std::move(op));
}
@ -167,7 +177,11 @@ void MetadataStorageFromPlainObjectStorageTransaction::moveDirectory(const std::
throwNotImplemented();
addOperation(std::make_unique<MetadataStorageFromPlainObjectStorageMoveDirectoryOperation>(
normalizeDirectoryPath(path_from), normalizeDirectoryPath(path_to), *metadata_storage.getPathMap(), object_storage));
normalizeDirectoryPath(path_from),
normalizeDirectoryPath(path_to),
*metadata_storage.getPathMap(),
object_storage,
metadata_storage.getMetadataKeyPrefix()));
}
void MetadataStorageFromPlainObjectStorageTransaction::addBlobToMetadata(

View File

@ -6,6 +6,8 @@
#include <Disks/ObjectStorages/MetadataStorageTransactionState.h>
#include <map>
#include <string>
#include <unordered_set>
namespace DB
{
@ -78,10 +80,20 @@ public:
bool supportsStat() const override { return false; }
protected:
/// Get the object storage prefix for storing metadata files. If stored behind a separate endpoint,
/// the metadata keys reflect the layout of the regular files.
virtual std::string getMetadataKeyPrefix() const { return object_storage->getCommonKeyPrefix(); }
/// Returns a map of local paths to paths in object storage.
virtual std::shared_ptr<PathMap> getPathMap() const { throwNotImplemented(); }
virtual std::vector<std::string> getDirectChildrenOnDisk(
const std::string & storage_key, const RelativePathsWithMetadata & remote_paths, const std::string & local_path) const;
/// Retrieves the immediate files and directories within a given directory on a disk.
virtual void getDirectChildrenOnDisk(
const std::string & storage_key,
const std::string & storage_key_perfix,
const RelativePathsWithMetadata & remote_paths,
const std::string & local_path,
std::unordered_set<std::string> & result) const;
};
class MetadataStorageFromPlainObjectStorageTransaction final : public IMetadataTransaction, private MetadataOperationsHolder

View File

@ -20,14 +20,24 @@ namespace
constexpr auto PREFIX_PATH_FILE_NAME = "prefix.path";
ObjectStorageKey createMetadataObjectKey(const std::string & key_prefix, const std::string & metadata_key_prefix)
{
auto prefix = std::filesystem::path(metadata_key_prefix) / key_prefix;
return ObjectStorageKey::createAsRelative(prefix.string(), PREFIX_PATH_FILE_NAME);
}
}
MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::MetadataStorageFromPlainObjectStorageCreateDirectoryOperation(
std::filesystem::path && path_,
std::string && key_prefix_,
MetadataStorageFromPlainObjectStorage::PathMap & path_map_,
ObjectStoragePtr object_storage_)
: path(std::move(path_)), key_prefix(key_prefix_), path_map(path_map_), object_storage(object_storage_)
ObjectStoragePtr object_storage_,
const std::string & metadata_key_prefix_)
: path(std::move(path_))
, key_prefix(key_prefix_)
, path_map(path_map_)
, object_storage(object_storage_)
, metadata_key_prefix(metadata_key_prefix_)
{
}
@ -36,13 +46,17 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std:
if (path_map.contains(path))
return;
LOG_TRACE(getLogger("MetadataStorageFromPlainObjectStorageCreateDirectoryOperation"), "Creating metadata for directory '{}'", path);
auto metadata_object_key = createMetadataObjectKey(key_prefix, metadata_key_prefix);
auto object_key = ObjectStorageKey::createAsRelative(key_prefix, PREFIX_PATH_FILE_NAME);
LOG_TRACE(
getLogger("MetadataStorageFromPlainObjectStorageCreateDirectoryOperation"),
"Creating metadata for directory '{}' with remote path='{}'",
path,
metadata_object_key.serialize());
auto object = StoredObject(object_key.serialize(), path / PREFIX_PATH_FILE_NAME);
auto metadata_object = StoredObject(metadata_object_key.serialize(), path / PREFIX_PATH_FILE_NAME);
auto buf = object_storage->writeObject(
object,
metadata_object,
WriteMode::Rewrite,
/* object_attributes */ std::nullopt,
/* buf_size */ DBMS_DEFAULT_BUFFER_SIZE,
@ -66,25 +80,31 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std:
void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::undo(std::unique_lock<SharedMutex> &)
{
auto object_key = ObjectStorageKey::createAsRelative(key_prefix, PREFIX_PATH_FILE_NAME);
auto metadata_object_key = createMetadataObjectKey(key_prefix, metadata_key_prefix);
if (write_finalized)
{
path_map.erase(path);
auto metric = object_storage->getMetadataStorageMetrics().directory_map_size;
CurrentMetrics::sub(metric, 1);
object_storage->removeObject(StoredObject(object_key.serialize(), path / PREFIX_PATH_FILE_NAME));
object_storage->removeObject(StoredObject(metadata_object_key.serialize(), path / PREFIX_PATH_FILE_NAME));
}
else if (write_created)
object_storage->removeObjectIfExists(StoredObject(object_key.serialize(), path / PREFIX_PATH_FILE_NAME));
object_storage->removeObjectIfExists(StoredObject(metadata_object_key.serialize(), path / PREFIX_PATH_FILE_NAME));
}
MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::MetadataStorageFromPlainObjectStorageMoveDirectoryOperation(
std::filesystem::path && path_from_,
std::filesystem::path && path_to_,
MetadataStorageFromPlainObjectStorage::PathMap & path_map_,
ObjectStoragePtr object_storage_)
: path_from(std::move(path_from_)), path_to(std::move(path_to_)), path_map(path_map_), object_storage(object_storage_)
ObjectStoragePtr object_storage_,
const std::string & metadata_key_prefix_)
: path_from(std::move(path_from_))
, path_to(std::move(path_to_))
, path_map(path_map_)
, object_storage(object_storage_)
, metadata_key_prefix(metadata_key_prefix_)
{
}
@ -98,26 +118,26 @@ std::unique_ptr<WriteBufferFromFileBase> MetadataStorageFromPlainObjectStorageMo
if (path_map.contains(new_path))
throw Exception(ErrorCodes::FILE_ALREADY_EXISTS, "Metadata object for the new (destination) path '{}' already exists", new_path);
auto object_key = ObjectStorageKey::createAsRelative(expected_it->second, PREFIX_PATH_FILE_NAME);
auto metadata_object_key = createMetadataObjectKey(expected_it->second, metadata_key_prefix);
auto object = StoredObject(object_key.serialize(), expected_path / PREFIX_PATH_FILE_NAME);
auto metadata_object = StoredObject(metadata_object_key.serialize(), expected_path / PREFIX_PATH_FILE_NAME);
if (validate_content)
{
std::string data;
auto read_buf = object_storage->readObject(object);
auto read_buf = object_storage->readObject(metadata_object);
readStringUntilEOF(data, *read_buf);
if (data != path_from)
throw Exception(
ErrorCodes::INCORRECT_DATA,
"Incorrect data for object key {}, expected {}, got {}",
object_key.serialize(),
metadata_object_key.serialize(),
expected_path,
data);
}
auto write_buf = object_storage->writeObject(
object,
metadata_object,
WriteMode::Rewrite,
/* object_attributes */ std::nullopt,
/*buf_size*/ DBMS_DEFAULT_BUFFER_SIZE,
@ -156,8 +176,11 @@ void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::undo(std::uniq
}
MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation(
std::filesystem::path && path_, MetadataStorageFromPlainObjectStorage::PathMap & path_map_, ObjectStoragePtr object_storage_)
: path(std::move(path_)), path_map(path_map_), object_storage(object_storage_)
std::filesystem::path && path_,
MetadataStorageFromPlainObjectStorage::PathMap & path_map_,
ObjectStoragePtr object_storage_,
const std::string & metadata_key_prefix_)
: path(std::move(path_)), path_map(path_map_), object_storage(object_storage_), metadata_key_prefix(metadata_key_prefix_)
{
}
@ -170,9 +193,9 @@ void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::execute(std:
LOG_TRACE(getLogger("MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation"), "Removing directory '{}'", path);
key_prefix = path_it->second;
auto object_key = ObjectStorageKey::createAsRelative(key_prefix, PREFIX_PATH_FILE_NAME);
auto object = StoredObject(object_key.serialize(), path / PREFIX_PATH_FILE_NAME);
object_storage->removeObject(object);
auto metadata_object_key = createMetadataObjectKey(key_prefix, metadata_key_prefix);
auto metadata_object = StoredObject(metadata_object_key.serialize(), path / PREFIX_PATH_FILE_NAME);
object_storage->removeObject(metadata_object);
path_map.erase(path_it);
auto metric = object_storage->getMetadataStorageMetrics().directory_map_size;
@ -189,10 +212,10 @@ void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::undo(std::un
if (!removed)
return;
auto object_key = ObjectStorageKey::createAsRelative(key_prefix, PREFIX_PATH_FILE_NAME);
auto object = StoredObject(object_key.serialize(), path / PREFIX_PATH_FILE_NAME);
auto metadata_object_key = createMetadataObjectKey(key_prefix, metadata_key_prefix);
auto metadata_object = StoredObject(metadata_object_key.serialize(), path / PREFIX_PATH_FILE_NAME);
auto buf = object_storage->writeObject(
object,
metadata_object,
WriteMode::Rewrite,
/* object_attributes */ std::nullopt,
/* buf_size */ DBMS_DEFAULT_BUFFER_SIZE,

View File

@ -16,6 +16,7 @@ private:
std::string key_prefix;
MetadataStorageFromPlainObjectStorage::PathMap & path_map;
ObjectStoragePtr object_storage;
const std::string metadata_key_prefix;
bool write_created = false;
bool write_finalized = false;
@ -26,7 +27,8 @@ public:
std::filesystem::path && path_,
std::string && key_prefix_,
MetadataStorageFromPlainObjectStorage::PathMap & path_map_,
ObjectStoragePtr object_storage_);
ObjectStoragePtr object_storage_,
const std::string & metadata_key_prefix_);
void execute(std::unique_lock<SharedMutex> & metadata_lock) override;
void undo(std::unique_lock<SharedMutex> & metadata_lock) override;
@ -39,6 +41,7 @@ private:
std::filesystem::path path_to;
MetadataStorageFromPlainObjectStorage::PathMap & path_map;
ObjectStoragePtr object_storage;
const std::string metadata_key_prefix;
bool write_created = false;
bool write_finalized = false;
@ -51,7 +54,8 @@ public:
std::filesystem::path && path_from_,
std::filesystem::path && path_to_,
MetadataStorageFromPlainObjectStorage::PathMap & path_map_,
ObjectStoragePtr object_storage_);
ObjectStoragePtr object_storage_,
const std::string & metadata_key_prefix_);
void execute(std::unique_lock<SharedMutex> & metadata_lock) override;
@ -65,13 +69,17 @@ private:
MetadataStorageFromPlainObjectStorage::PathMap & path_map;
ObjectStoragePtr object_storage;
const std::string metadata_key_prefix;
std::string key_prefix;
bool removed = false;
public:
MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation(
std::filesystem::path && path_, MetadataStorageFromPlainObjectStorage::PathMap & path_map_, ObjectStoragePtr object_storage_);
std::filesystem::path && path_,
MetadataStorageFromPlainObjectStorage::PathMap & path_map_,
ObjectStoragePtr object_storage_,
const std::string & metadata_key_prefix_);
void execute(std::unique_lock<SharedMutex> & metadata_lock) override;
void undo(std::unique_lock<SharedMutex> & metadata_lock) override;

View File

@ -1,6 +1,7 @@
#include <Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h>
#include <Disks/ObjectStorages/ObjectStorageIterator.h>
#include <unordered_set>
#include <IO/ReadHelpers.h>
#include <IO/SharedThreadPools.h>
#include <IO/S3Common.h>
@ -21,8 +22,22 @@ namespace
{
constexpr auto PREFIX_PATH_FILE_NAME = "prefix.path";
constexpr auto METADATA_PATH_TOKEN = "__meta/";
MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::string & root, ObjectStoragePtr object_storage)
/// Use a separate layout for metadata iff:
/// 1. The disk endpoint does not contain objects, OR
/// 2. The metadata is already stored behind a separate endpoint.
/// Otherwise, store metadata along with regular data for backward compatibility.
std::string getMetadataKeyPrefix(ObjectStoragePtr object_storage)
{
const auto common_key_prefix = std::filesystem::path(object_storage->getCommonKeyPrefix());
const auto metadata_key_prefix = std::filesystem::path(common_key_prefix) / METADATA_PATH_TOKEN;
return !object_storage->existsOrHasAnyChild(metadata_key_prefix / "") && object_storage->existsOrHasAnyChild(common_key_prefix / "")
? common_key_prefix
: metadata_key_prefix;
}
MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::string & metadata_key_prefix, ObjectStoragePtr object_storage)
{
MetadataStorageFromPlainObjectStorage::PathMap result;
@ -39,16 +54,16 @@ MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::stri
LOG_DEBUG(log, "Loading metadata");
size_t num_files = 0;
for (auto iterator = object_storage->iterate(root, 0); iterator->isValid(); iterator->next())
for (auto iterator = object_storage->iterate(metadata_key_prefix, 0); iterator->isValid(); iterator->next())
{
++num_files;
auto file = iterator->current();
String path = file->getPath();
auto remote_path = std::filesystem::path(path);
if (remote_path.filename() != PREFIX_PATH_FILE_NAME)
auto remote_metadata_path = std::filesystem::path(path);
if (remote_metadata_path.filename() != PREFIX_PATH_FILE_NAME)
continue;
runner([remote_path, path, &object_storage, &result, &mutex, &log, &settings]
runner([remote_metadata_path, path, &object_storage, &result, &mutex, &log, &settings, &metadata_key_prefix]
{
setThreadName("PlainRWMetaLoad");
@ -75,7 +90,10 @@ MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::stri
throw;
}
chassert(remote_path.has_parent_path());
chassert(remote_metadata_path.has_parent_path());
chassert(remote_metadata_path.string().starts_with(metadata_key_prefix));
auto suffix = remote_metadata_path.string().substr(metadata_key_prefix.size());
auto remote_path = std::filesystem::path(std::move(suffix));
std::pair<MetadataStorageFromPlainObjectStorage::PathMap::iterator, bool> res;
{
std::lock_guard lock(mutex);
@ -103,17 +121,17 @@ MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::stri
return result;
}
std::vector<std::string> getDirectChildrenOnRewritableDisk(
void getDirectChildrenOnRewritableDisk(
const std::string & storage_key,
const std::string & storage_key_perfix,
const RelativePathsWithMetadata & remote_paths,
const std::string & local_path,
const MetadataStorageFromPlainObjectStorage::PathMap & local_path_prefixes,
SharedMutex & shared_mutex)
SharedMutex & shared_mutex,
std::unordered_set<std::string> & result)
{
using PathMap = MetadataStorageFromPlainObjectStorage::PathMap;
std::unordered_set<std::string> duplicates_filter;
/// Map remote paths into local subdirectories.
std::unordered_map<PathMap::mapped_type, PathMap::key_type> remote_to_local_subdir;
@ -149,22 +167,21 @@ std::vector<std::string> getDirectChildrenOnRewritableDisk(
/// File names.
auto filename = path.substr(child_pos);
if (!skip_list.contains(filename))
duplicates_filter.emplace(std::move(filename));
result.emplace(std::move(filename));
}
else
{
/// Subdirectories.
auto it = remote_to_local_subdir.find(path.substr(0, slash_pos));
chassert(path.find(storage_key_perfix) == 0);
auto it = remote_to_local_subdir.find(path.substr(storage_key_perfix.size(), slash_pos - storage_key_perfix.size()));
/// Mapped subdirectories.
if (it != remote_to_local_subdir.end())
duplicates_filter.emplace(it->second);
result.emplace(it->second);
/// The remote subdirectory name is the same as the local subdirectory.
else
duplicates_filter.emplace(path.substr(child_pos, slash_pos - child_pos));
result.emplace(path.substr(child_pos, slash_pos - child_pos));
}
}
return std::vector<std::string>(std::make_move_iterator(duplicates_filter.begin()), std::make_move_iterator(duplicates_filter.end()));
}
}
@ -172,7 +189,8 @@ std::vector<std::string> getDirectChildrenOnRewritableDisk(
MetadataStorageFromPlainRewritableObjectStorage::MetadataStorageFromPlainRewritableObjectStorage(
ObjectStoragePtr object_storage_, String storage_path_prefix_)
: MetadataStorageFromPlainObjectStorage(object_storage_, storage_path_prefix_)
, path_map(std::make_shared<PathMap>(loadPathPrefixMap(object_storage->getCommonKeyPrefix(), object_storage)))
, metadata_key_prefix(DB::getMetadataKeyPrefix(object_storage))
, path_map(std::make_shared<PathMap>(loadPathPrefixMap(metadata_key_prefix, object_storage)))
{
if (object_storage->isWriteOnce())
throw Exception(
@ -190,10 +208,71 @@ MetadataStorageFromPlainRewritableObjectStorage::~MetadataStorageFromPlainRewrit
CurrentMetrics::sub(metric, path_map->size());
}
std::vector<std::string> MetadataStorageFromPlainRewritableObjectStorage::getDirectChildrenOnDisk(
const std::string & storage_key, const RelativePathsWithMetadata & remote_paths, const std::string & local_path) const
bool MetadataStorageFromPlainRewritableObjectStorage::exists(const std::string & path) const
{
return getDirectChildrenOnRewritableDisk(storage_key, remote_paths, local_path, *getPathMap(), metadata_mutex);
if (MetadataStorageFromPlainObjectStorage::exists(path))
return true;
if (getMetadataKeyPrefix() != object_storage->getCommonKeyPrefix())
{
auto key_prefix = object_storage->generateObjectKeyForPath(path).serialize();
chassert(key_prefix.starts_with(object_storage->getCommonKeyPrefix()));
auto metadata_key = std::filesystem::path(getMetadataKeyPrefix()) / key_prefix.substr(object_storage->getCommonKeyPrefix().size());
return object_storage->existsOrHasAnyChild(metadata_key);
}
return false;
}
bool MetadataStorageFromPlainRewritableObjectStorage::isDirectory(const std::string & path) const
{
if (getMetadataKeyPrefix() != object_storage->getCommonKeyPrefix())
{
auto directory = std::filesystem::path(object_storage->generateObjectKeyForPath(path).serialize()) / "";
chassert(directory.string().starts_with(object_storage->getCommonKeyPrefix()));
auto metadata_key
= std::filesystem::path(getMetadataKeyPrefix()) / directory.string().substr(object_storage->getCommonKeyPrefix().size());
return object_storage->existsOrHasAnyChild(metadata_key);
}
else
return MetadataStorageFromPlainObjectStorage::isDirectory(path);
}
std::vector<std::string> MetadataStorageFromPlainRewritableObjectStorage::listDirectory(const std::string & path) const
{
auto key_prefix = object_storage->generateObjectKeyForPath(path).serialize();
RelativePathsWithMetadata files;
std::string abs_key = key_prefix;
if (!abs_key.ends_with('/'))
abs_key += '/';
object_storage->listObjects(abs_key, files, 0);
std::unordered_set<std::string> directories;
getDirectChildrenOnDisk(abs_key, object_storage->getCommonKeyPrefix(), files, path, directories);
/// List empty directories that are identified by the `prefix.path` metadata files. This is required to, e.g., remove
/// metadata along with regular files.
if (object_storage->getCommonKeyPrefix() != getMetadataKeyPrefix())
{
chassert(abs_key.starts_with(object_storage->getCommonKeyPrefix()));
auto metadata_key = std::filesystem::path(getMetadataKeyPrefix()) / abs_key.substr(object_storage->getCommonKeyPrefix().size());
RelativePathsWithMetadata metadata_files;
object_storage->listObjects(metadata_key, metadata_files, 0);
getDirectChildrenOnDisk(metadata_key, getMetadataKeyPrefix(), metadata_files, path, directories);
}
return std::vector<std::string>(std::make_move_iterator(directories.begin()), std::make_move_iterator(directories.end()));
}
void MetadataStorageFromPlainRewritableObjectStorage::getDirectChildrenOnDisk(
const std::string & storage_key,
const std::string & storage_key_perfix,
const RelativePathsWithMetadata & remote_paths,
const std::string & local_path,
std::unordered_set<std::string> & result) const
{
getDirectChildrenOnRewritableDisk(storage_key, storage_key_perfix, remote_paths, local_path, *getPathMap(), metadata_mutex, result);
}
}

View File

@ -3,6 +3,7 @@
#include <Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h>
#include <memory>
#include <unordered_set>
namespace DB
@ -11,6 +12,7 @@ namespace DB
class MetadataStorageFromPlainRewritableObjectStorage final : public MetadataStorageFromPlainObjectStorage
{
private:
const std::string metadata_key_prefix;
std::shared_ptr<PathMap> path_map;
public:
@ -18,11 +20,20 @@ public:
~MetadataStorageFromPlainRewritableObjectStorage() override;
MetadataStorageType getType() const override { return MetadataStorageType::PlainRewritable; }
bool exists(const std::string & path) const override;
bool isDirectory(const std::string & path) const override;
std::vector<std::string> listDirectory(const std::string & path) const override;
protected:
std::string getMetadataKeyPrefix() const override { return metadata_key_prefix; }
std::shared_ptr<PathMap> getPathMap() const override { return path_map; }
std::vector<std::string> getDirectChildrenOnDisk(
const std::string & storage_key, const RelativePathsWithMetadata & remote_paths, const std::string & local_path) const override;
void getDirectChildrenOnDisk(
const std::string & storage_key,
const std::string & storage_key_perfix,
const RelativePathsWithMetadata & remote_paths,
const std::string & local_path,
std::unordered_set<std::string> & result) const override;
};
}

View File

@ -139,6 +139,19 @@ def test(storage_policy):
== insert_values_arr[i]
)
metadata_it = cluster.minio_client.list_objects(
cluster.minio_bucket, "data/", recursive=True
)
metadata_count = 0
for obj in list(metadata_it):
if "/__meta/" in obj.object_name:
assert obj.object_name.endswith("/prefix.path")
metadata_count += 1
else:
assert not obj.object_name.endswith("/prefix.path")
assert metadata_count > 0
for i in range(NUM_WORKERS):
node = cluster.instances[f"node{i + 1}"]
node.query("DROP TABLE IF EXISTS test SYNC")