Merge pull request #52211 from kssenii/fix-race-in-disk-web

Fix race in Web disk
This commit is contained in:
Kseniia Sumarokova 2023-07-23 18:07:25 +02:00 committed by GitHub
commit a1782ea70e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 61 additions and 68 deletions

View File

@ -33,46 +33,18 @@ const std::string & MetadataStorageFromStaticFilesWebServer::getPath() const
bool MetadataStorageFromStaticFilesWebServer::exists(const std::string & path) const
{
fs::path fs_path(path);
if (fs_path.has_extension())
fs_path = fs_path.parent_path();
initializeIfNeeded(fs_path);
if (object_storage.files.empty())
return false;
if (object_storage.files.contains(path))
return true;
/// `object_storage.files` contains files + directories only inside `metadata_path / uuid_3_digit / uuid /`
/// (specific table files only), but we need to be able to also tell if `exists(<metadata_path>)`, for example.
auto it = std::lower_bound(
object_storage.files.begin(),
object_storage.files.end(),
path,
[](const auto & file, const std::string & path_) { return file.first < path_; }
);
if (it == object_storage.files.end())
return false;
if (startsWith(it->first, path)
|| (it != object_storage.files.begin() && startsWith(std::prev(it)->first, path)))
return true;
return false;
return object_storage.exists(path);
}
void MetadataStorageFromStaticFilesWebServer::assertExists(const std::string & path) const
{
initializeIfNeeded(path);
if (!exists(path))
#ifdef NDEBUG
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no path {}", path);
#else
{
std::string all_files;
std::shared_lock shared_lock(object_storage.metadata_mutex);
for (const auto & [file, _] : object_storage.files)
{
if (!all_files.empty())
@ -87,33 +59,40 @@ void MetadataStorageFromStaticFilesWebServer::assertExists(const std::string & p
bool MetadataStorageFromStaticFilesWebServer::isFile(const std::string & path) const
{
assertExists(path);
std::shared_lock shared_lock(object_storage.metadata_mutex);
return object_storage.files.at(path).type == WebObjectStorage::FileType::File;
}
bool MetadataStorageFromStaticFilesWebServer::isDirectory(const std::string & path) const
{
assertExists(path);
std::shared_lock shared_lock(object_storage.metadata_mutex);
return object_storage.files.at(path).type == WebObjectStorage::FileType::Directory;
}
uint64_t MetadataStorageFromStaticFilesWebServer::getFileSize(const String & path) const
{
assertExists(path);
std::shared_lock shared_lock(object_storage.metadata_mutex);
return object_storage.files.at(path).size;
}
StoredObjects MetadataStorageFromStaticFilesWebServer::getStorageObjects(const std::string & path) const
{
assertExists(path);
auto fs_path = fs::path(object_storage.url) / path;
std::string remote_path = fs_path.parent_path() / (escapeForFileName(fs_path.stem()) + fs_path.extension().string());
remote_path = remote_path.substr(object_storage.url.size());
std::shared_lock shared_lock(object_storage.metadata_mutex);
return {StoredObject(remote_path, object_storage.files.at(path).size, path)};
}
std::vector<std::string> MetadataStorageFromStaticFilesWebServer::listDirectory(const std::string & path) const
{
std::vector<std::string> result;
std::shared_lock shared_lock(object_storage.metadata_mutex);
for (const auto & [file_path, _] : object_storage.files)
{
if (file_path.starts_with(path))
@ -122,22 +101,14 @@ std::vector<std::string> MetadataStorageFromStaticFilesWebServer::listDirectory(
return result;
}
void MetadataStorageFromStaticFilesWebServer::initializeIfNeeded(const std::string & path) const
{
if (object_storage.files.find(path) == object_storage.files.end())
{
object_storage.initialize(fs::path(object_storage.url) / path);
}
}
DirectoryIteratorPtr MetadataStorageFromStaticFilesWebServer::iterateDirectory(const std::string & path) const
{
std::vector<fs::path> dir_file_paths;
initializeIfNeeded(path);
if (!exists(path))
return std::make_unique<StaticDirectoryIterator>(std::move(dir_file_paths));
std::shared_lock shared_lock(object_storage.metadata_mutex);
for (const auto & [file_path, _] : object_storage.files)
{
if (fs::path(parentPath(file_path)) / "" == fs::path(path) / "")

View File

@ -13,13 +13,14 @@ class MetadataStorageFromStaticFilesWebServer final : public IMetadataStorage
{
private:
friend class MetadataStorageFromStaticFilesWebServerTransaction;
using FileType = WebObjectStorage::FileType;
const WebObjectStorage & object_storage;
std::string root_path;
void assertExists(const std::string & path) const;
void initializeIfNeeded(const std::string & path) const;
void initializeImpl(const String & uri_path, const std::unique_lock<std::shared_mutex> &) const;
public:
explicit MetadataStorageFromStaticFilesWebServer(const WebObjectStorage & object_storage_);

View File

@ -28,10 +28,9 @@ namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED;
extern const int NETWORK_ERROR;
}
void WebObjectStorage::initialize(const String & uri_path) const
void WebObjectStorage::initialize(const String & uri_path, const std::unique_lock<std::shared_mutex> & lock) const
{
std::vector<String> directories_to_load;
LOG_TRACE(log, "Loading metadata for directory: {}", uri_path);
@ -81,8 +80,9 @@ void WebObjectStorage::initialize(const String & uri_path) const
}
file_path = file_path.substr(url.size());
files.emplace(std::make_pair(file_path, file_data));
LOG_TRACE(&Poco::Logger::get("DiskWeb"), "Adding file: {}, size: {}", file_path, file_data.size);
files.emplace(std::make_pair(file_path, file_data));
}
files.emplace(std::make_pair(dir_name, FileData({ .type = FileType::Directory })));
@ -103,7 +103,7 @@ void WebObjectStorage::initialize(const String & uri_path) const
}
for (const auto & directory_path : directories_to_load)
initialize(directory_path);
initialize(directory_path, lock);
}
@ -118,31 +118,51 @@ WebObjectStorage::WebObjectStorage(
bool WebObjectStorage::exists(const StoredObject & object) const
{
const auto & path = object.remote_path;
return exists(object.remote_path);
}
bool WebObjectStorage::exists(const std::string & path) const
{
LOG_TRACE(&Poco::Logger::get("DiskWeb"), "Checking existence of path: {}", path);
if (files.find(path) != files.end())
std::shared_lock shared_lock(metadata_mutex);
if (files.find(path) == files.end())
{
shared_lock.unlock();
std::unique_lock unique_lock(metadata_mutex);
if (files.find(path) == files.end())
{
fs::path index_file_dir = fs::path(url) / path;
if (index_file_dir.has_extension())
index_file_dir = index_file_dir.parent_path();
initialize(index_file_dir, unique_lock);
}
/// Files are never deleted from `files` as disk is read only, so no worry that we unlock now.
unique_lock.unlock();
shared_lock.lock();
}
if (files.empty())
return false;
if (files.contains(path))
return true;
if (path.ends_with(MergeTreeData::FORMAT_VERSION_FILE_NAME) && files.find(fs::path(path).parent_path() / "") == files.end())
{
try
{
initialize(fs::path(url) / fs::path(path).parent_path());
return files.find(path) != files.end();
}
catch (...)
{
const auto message = getCurrentExceptionMessage(false);
bool can_throw = CurrentThread::isInitialized() && CurrentThread::get().getQueryContext();
if (can_throw)
throw Exception(ErrorCodes::NETWORK_ERROR, "Cannot load disk metadata. Error: {}", message);
/// `object_storage.files` contains files + directories only inside `metadata_path / uuid_3_digit / uuid /`
/// (specific table files only), but we need to be able to also tell if `exists(<metadata_path>)`, for example.
auto it = std::lower_bound(
files.begin(), files.end(), path,
[](const auto & file, const std::string & path_) { return file.first < path_; }
);
LOG_TRACE(&Poco::Logger::get("DiskWeb"), "Cannot load disk metadata. Error: {}", message);
if (it == files.end())
return false;
}
}
if (startsWith(it->first, path)
|| (it != files.begin() && startsWith(std::prev(it)->first, path)))
return true;
return false;
}

View File

@ -3,6 +3,7 @@
#include "config.h"
#include <Disks/ObjectStorages/IObjectStorage.h>
#include <shared_mutex>
namespace Poco
{
@ -93,9 +94,8 @@ public:
bool isReadOnly() const override { return true; }
protected:
void initialize(const String & uri_path) const;
[[noreturn]] static void throwNotAllowed();
bool exists(const std::string & path) const;
enum class FileType
{
@ -111,12 +111,13 @@ protected:
using Files = std::map<String, FileData>; /// file path -> file data
mutable Files files;
String url;
mutable std::shared_mutex metadata_mutex;
private:
Poco::Logger * log;
void initialize(const String & path, const std::unique_lock<std::shared_mutex> &) const;
const String url;
Poco::Logger * log;
size_t min_bytes_for_seek;
};