mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
Merge pull request #52211 from kssenii/fix-race-in-disk-web
Fix race in Web disk
This commit is contained in:
commit
a1782ea70e
@ -33,46 +33,18 @@ const std::string & MetadataStorageFromStaticFilesWebServer::getPath() const
|
||||
|
||||
bool MetadataStorageFromStaticFilesWebServer::exists(const std::string & path) const
|
||||
{
|
||||
fs::path fs_path(path);
|
||||
if (fs_path.has_extension())
|
||||
fs_path = fs_path.parent_path();
|
||||
|
||||
initializeIfNeeded(fs_path);
|
||||
|
||||
if (object_storage.files.empty())
|
||||
return false;
|
||||
|
||||
if (object_storage.files.contains(path))
|
||||
return true;
|
||||
|
||||
/// `object_storage.files` contains files + directories only inside `metadata_path / uuid_3_digit / uuid /`
|
||||
/// (specific table files only), but we need to be able to also tell if `exists(<metadata_path>)`, for example.
|
||||
auto it = std::lower_bound(
|
||||
object_storage.files.begin(),
|
||||
object_storage.files.end(),
|
||||
path,
|
||||
[](const auto & file, const std::string & path_) { return file.first < path_; }
|
||||
);
|
||||
if (it == object_storage.files.end())
|
||||
return false;
|
||||
|
||||
if (startsWith(it->first, path)
|
||||
|| (it != object_storage.files.begin() && startsWith(std::prev(it)->first, path)))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
return object_storage.exists(path);
|
||||
}
|
||||
|
||||
void MetadataStorageFromStaticFilesWebServer::assertExists(const std::string & path) const
|
||||
{
|
||||
initializeIfNeeded(path);
|
||||
|
||||
if (!exists(path))
|
||||
#ifdef NDEBUG
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no path {}", path);
|
||||
#else
|
||||
{
|
||||
std::string all_files;
|
||||
std::shared_lock shared_lock(object_storage.metadata_mutex);
|
||||
for (const auto & [file, _] : object_storage.files)
|
||||
{
|
||||
if (!all_files.empty())
|
||||
@ -87,33 +59,40 @@ void MetadataStorageFromStaticFilesWebServer::assertExists(const std::string & p
|
||||
bool MetadataStorageFromStaticFilesWebServer::isFile(const std::string & path) const
|
||||
{
|
||||
assertExists(path);
|
||||
std::shared_lock shared_lock(object_storage.metadata_mutex);
|
||||
return object_storage.files.at(path).type == WebObjectStorage::FileType::File;
|
||||
}
|
||||
|
||||
bool MetadataStorageFromStaticFilesWebServer::isDirectory(const std::string & path) const
|
||||
{
|
||||
assertExists(path);
|
||||
std::shared_lock shared_lock(object_storage.metadata_mutex);
|
||||
return object_storage.files.at(path).type == WebObjectStorage::FileType::Directory;
|
||||
}
|
||||
|
||||
uint64_t MetadataStorageFromStaticFilesWebServer::getFileSize(const String & path) const
|
||||
{
|
||||
assertExists(path);
|
||||
std::shared_lock shared_lock(object_storage.metadata_mutex);
|
||||
return object_storage.files.at(path).size;
|
||||
}
|
||||
|
||||
StoredObjects MetadataStorageFromStaticFilesWebServer::getStorageObjects(const std::string & path) const
|
||||
{
|
||||
assertExists(path);
|
||||
|
||||
auto fs_path = fs::path(object_storage.url) / path;
|
||||
std::string remote_path = fs_path.parent_path() / (escapeForFileName(fs_path.stem()) + fs_path.extension().string());
|
||||
remote_path = remote_path.substr(object_storage.url.size());
|
||||
|
||||
std::shared_lock shared_lock(object_storage.metadata_mutex);
|
||||
return {StoredObject(remote_path, object_storage.files.at(path).size, path)};
|
||||
}
|
||||
|
||||
std::vector<std::string> MetadataStorageFromStaticFilesWebServer::listDirectory(const std::string & path) const
|
||||
{
|
||||
std::vector<std::string> result;
|
||||
std::shared_lock shared_lock(object_storage.metadata_mutex);
|
||||
for (const auto & [file_path, _] : object_storage.files)
|
||||
{
|
||||
if (file_path.starts_with(path))
|
||||
@ -122,22 +101,14 @@ std::vector<std::string> MetadataStorageFromStaticFilesWebServer::listDirectory(
|
||||
return result;
|
||||
}
|
||||
|
||||
void MetadataStorageFromStaticFilesWebServer::initializeIfNeeded(const std::string & path) const
|
||||
{
|
||||
if (object_storage.files.find(path) == object_storage.files.end())
|
||||
{
|
||||
object_storage.initialize(fs::path(object_storage.url) / path);
|
||||
}
|
||||
}
|
||||
|
||||
DirectoryIteratorPtr MetadataStorageFromStaticFilesWebServer::iterateDirectory(const std::string & path) const
|
||||
{
|
||||
std::vector<fs::path> dir_file_paths;
|
||||
|
||||
initializeIfNeeded(path);
|
||||
if (!exists(path))
|
||||
return std::make_unique<StaticDirectoryIterator>(std::move(dir_file_paths));
|
||||
|
||||
std::shared_lock shared_lock(object_storage.metadata_mutex);
|
||||
for (const auto & [file_path, _] : object_storage.files)
|
||||
{
|
||||
if (fs::path(parentPath(file_path)) / "" == fs::path(path) / "")
|
||||
|
@ -13,13 +13,14 @@ class MetadataStorageFromStaticFilesWebServer final : public IMetadataStorage
|
||||
{
|
||||
private:
|
||||
friend class MetadataStorageFromStaticFilesWebServerTransaction;
|
||||
using FileType = WebObjectStorage::FileType;
|
||||
|
||||
const WebObjectStorage & object_storage;
|
||||
std::string root_path;
|
||||
|
||||
void assertExists(const std::string & path) const;
|
||||
|
||||
void initializeIfNeeded(const std::string & path) const;
|
||||
void initializeImpl(const String & uri_path, const std::unique_lock<std::shared_mutex> &) const;
|
||||
|
||||
public:
|
||||
explicit MetadataStorageFromStaticFilesWebServer(const WebObjectStorage & object_storage_);
|
||||
|
@ -28,10 +28,9 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int NETWORK_ERROR;
|
||||
}
|
||||
|
||||
void WebObjectStorage::initialize(const String & uri_path) const
|
||||
void WebObjectStorage::initialize(const String & uri_path, const std::unique_lock<std::shared_mutex> & lock) const
|
||||
{
|
||||
std::vector<String> directories_to_load;
|
||||
LOG_TRACE(log, "Loading metadata for directory: {}", uri_path);
|
||||
@ -81,8 +80,9 @@ void WebObjectStorage::initialize(const String & uri_path) const
|
||||
}
|
||||
|
||||
file_path = file_path.substr(url.size());
|
||||
files.emplace(std::make_pair(file_path, file_data));
|
||||
LOG_TRACE(&Poco::Logger::get("DiskWeb"), "Adding file: {}, size: {}", file_path, file_data.size);
|
||||
|
||||
files.emplace(std::make_pair(file_path, file_data));
|
||||
}
|
||||
|
||||
files.emplace(std::make_pair(dir_name, FileData({ .type = FileType::Directory })));
|
||||
@ -103,7 +103,7 @@ void WebObjectStorage::initialize(const String & uri_path) const
|
||||
}
|
||||
|
||||
for (const auto & directory_path : directories_to_load)
|
||||
initialize(directory_path);
|
||||
initialize(directory_path, lock);
|
||||
}
|
||||
|
||||
|
||||
@ -118,31 +118,51 @@ WebObjectStorage::WebObjectStorage(
|
||||
|
||||
bool WebObjectStorage::exists(const StoredObject & object) const
|
||||
{
|
||||
const auto & path = object.remote_path;
|
||||
return exists(object.remote_path);
|
||||
}
|
||||
|
||||
bool WebObjectStorage::exists(const std::string & path) const
|
||||
{
|
||||
LOG_TRACE(&Poco::Logger::get("DiskWeb"), "Checking existence of path: {}", path);
|
||||
|
||||
if (files.find(path) != files.end())
|
||||
std::shared_lock shared_lock(metadata_mutex);
|
||||
|
||||
if (files.find(path) == files.end())
|
||||
{
|
||||
shared_lock.unlock();
|
||||
std::unique_lock unique_lock(metadata_mutex);
|
||||
if (files.find(path) == files.end())
|
||||
{
|
||||
fs::path index_file_dir = fs::path(url) / path;
|
||||
if (index_file_dir.has_extension())
|
||||
index_file_dir = index_file_dir.parent_path();
|
||||
|
||||
initialize(index_file_dir, unique_lock);
|
||||
}
|
||||
/// Files are never deleted from `files` as disk is read only, so no worry that we unlock now.
|
||||
unique_lock.unlock();
|
||||
shared_lock.lock();
|
||||
}
|
||||
|
||||
if (files.empty())
|
||||
return false;
|
||||
|
||||
if (files.contains(path))
|
||||
return true;
|
||||
|
||||
if (path.ends_with(MergeTreeData::FORMAT_VERSION_FILE_NAME) && files.find(fs::path(path).parent_path() / "") == files.end())
|
||||
{
|
||||
try
|
||||
{
|
||||
initialize(fs::path(url) / fs::path(path).parent_path());
|
||||
return files.find(path) != files.end();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
const auto message = getCurrentExceptionMessage(false);
|
||||
bool can_throw = CurrentThread::isInitialized() && CurrentThread::get().getQueryContext();
|
||||
if (can_throw)
|
||||
throw Exception(ErrorCodes::NETWORK_ERROR, "Cannot load disk metadata. Error: {}", message);
|
||||
/// `object_storage.files` contains files + directories only inside `metadata_path / uuid_3_digit / uuid /`
|
||||
/// (specific table files only), but we need to be able to also tell if `exists(<metadata_path>)`, for example.
|
||||
auto it = std::lower_bound(
|
||||
files.begin(), files.end(), path,
|
||||
[](const auto & file, const std::string & path_) { return file.first < path_; }
|
||||
);
|
||||
|
||||
LOG_TRACE(&Poco::Logger::get("DiskWeb"), "Cannot load disk metadata. Error: {}", message);
|
||||
if (it == files.end())
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (startsWith(it->first, path)
|
||||
|| (it != files.begin() && startsWith(std::prev(it)->first, path)))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include "config.h"
|
||||
|
||||
#include <Disks/ObjectStorages/IObjectStorage.h>
|
||||
#include <shared_mutex>
|
||||
|
||||
namespace Poco
|
||||
{
|
||||
@ -93,9 +94,8 @@ public:
|
||||
bool isReadOnly() const override { return true; }
|
||||
|
||||
protected:
|
||||
void initialize(const String & uri_path) const;
|
||||
|
||||
[[noreturn]] static void throwNotAllowed();
|
||||
bool exists(const std::string & path) const;
|
||||
|
||||
enum class FileType
|
||||
{
|
||||
@ -111,12 +111,13 @@ protected:
|
||||
|
||||
using Files = std::map<String, FileData>; /// file path -> file data
|
||||
mutable Files files;
|
||||
|
||||
String url;
|
||||
mutable std::shared_mutex metadata_mutex;
|
||||
|
||||
private:
|
||||
Poco::Logger * log;
|
||||
void initialize(const String & path, const std::unique_lock<std::shared_mutex> &) const;
|
||||
|
||||
const String url;
|
||||
Poco::Logger * log;
|
||||
size_t min_bytes_for_seek;
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user