mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-29 19:12:03 +00:00
Merge pull request #28411 from kssenii/fix-web-disk
Fix disk with static files a little
This commit is contained in:
commit
ab3e005e6a
@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
#include <common/logger_useful.h>
|
#include <common/logger_useful.h>
|
||||||
|
|
||||||
|
#include <Common/escapeForFileName.h>
|
||||||
#include <IO/ReadWriteBufferFromHTTP.h>
|
#include <IO/ReadWriteBufferFromHTTP.h>
|
||||||
#include <Disks/ReadIndirectBufferFromWebServer.h>
|
#include <Disks/ReadIndirectBufferFromWebServer.h>
|
||||||
#include <IO/SeekAvoidingReadBuffer.h>
|
#include <IO/SeekAvoidingReadBuffer.h>
|
||||||
@ -19,10 +20,11 @@
|
|||||||
#define UUID_PATTERN "[\\w]{8}-[\\w]{4}-[\\w]{4}-[\\w]{4}-[\\w]{12}"
|
#define UUID_PATTERN "[\\w]{8}-[\\w]{4}-[\\w]{4}-[\\w]{4}-[\\w]{12}"
|
||||||
#define EXTRACT_UUID_PATTERN fmt::format(".*/({})/.*", UUID_PATTERN)
|
#define EXTRACT_UUID_PATTERN fmt::format(".*/({})/.*", UUID_PATTERN)
|
||||||
|
|
||||||
#define DIRECTORY_FILE_PATTERN(prefix) fmt::format("{}-({})-(\\w+)-(\\w+\\.\\w+)", prefix, UUID_PATTERN)
|
#define DIRECTORY_FILE_PATTERN(prefix) fmt::format("{}-({})-(\\w+)-(.*)", prefix, UUID_PATTERN)
|
||||||
#define ROOT_FILE_PATTERN(prefix) fmt::format("{}-({})-(\\w+\\.\\w+)", prefix, UUID_PATTERN)
|
#define ROOT_FILE_PATTERN(prefix) fmt::format("{}-({})-(\\w+\\.\\w+)", prefix, UUID_PATTERN)
|
||||||
|
|
||||||
#define MATCH_DIRECTORY_FILE_PATTERN fmt::format(".*/({})/(\\w+)/(\\w+\\.\\w+)", UUID_PATTERN)
|
#define MATCH_DIRECTORY_FILE_PATTERN fmt::format(".*/({})/(\\w+)/(.*)", UUID_PATTERN)
|
||||||
|
#define MATCH_DIRECTORY_PATTERN fmt::format(".*/({})/(\\w+)/", UUID_PATTERN)
|
||||||
#define MATCH_ROOT_FILE_PATTERN fmt::format(".*/({})/(\\w+\\.\\w+)", UUID_PATTERN)
|
#define MATCH_ROOT_FILE_PATTERN fmt::format(".*/({})/(\\w+\\.\\w+)", UUID_PATTERN)
|
||||||
|
|
||||||
|
|
||||||
@ -67,14 +69,14 @@ void DiskWebServer::Metadata::initialize(const String & uri_with_path, const Str
|
|||||||
if (uuid != table_uuid)
|
if (uuid != table_uuid)
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected uuid: {}, expected: {}", uuid, table_uuid);
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected uuid: {}, expected: {}", uuid, table_uuid);
|
||||||
|
|
||||||
tables_data[uuid][directory].emplace(File(file, file_size));
|
tables_data[uuid][directory].emplace(std::make_pair(file, file_size));
|
||||||
}
|
}
|
||||||
else if (RE2::FullMatch(remote_file_name, ROOT_FILE_PATTERN(files_prefix), &uuid, &file))
|
else if (RE2::FullMatch(remote_file_name, ROOT_FILE_PATTERN(files_prefix), &uuid, &file))
|
||||||
{
|
{
|
||||||
if (uuid != table_uuid)
|
if (uuid != table_uuid)
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected uuid: {}, expected: {}", uuid, table_uuid);
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected uuid: {}, expected: {}", uuid, table_uuid);
|
||||||
|
|
||||||
tables_data[uuid][file].emplace(File(file, file_size));
|
tables_data[uuid][file].emplace(std::make_pair(file, file_size));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected file: {}", remote_file_name);
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected file: {}", remote_file_name);
|
||||||
@ -82,11 +84,10 @@ void DiskWebServer::Metadata::initialize(const String & uri_with_path, const Str
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename Directory>
|
||||||
class DiskWebDirectoryIterator final : public IDiskDirectoryIterator
|
class DiskWebDirectoryIterator final : public IDiskDirectoryIterator
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
using Directory = std::unordered_map<String, T>;
|
|
||||||
|
|
||||||
DiskWebDirectoryIterator(Directory & directory_, const String & directory_root_)
|
DiskWebDirectoryIterator(Directory & directory_, const String & directory_root_)
|
||||||
: directory(directory_), iter(directory.begin()), directory_root(directory_root_)
|
: directory(directory_), iter(directory.begin()), directory_root(directory_root_)
|
||||||
@ -193,7 +194,8 @@ bool DiskWebServer::findFileInMetadata(const String & path, File & file_info) co
|
|||||||
String table_uuid, directory_name, file_name;
|
String table_uuid, directory_name, file_name;
|
||||||
|
|
||||||
if (RE2::FullMatch(path, MATCH_DIRECTORY_FILE_PATTERN, &table_uuid, &directory_name, &file_name)
|
if (RE2::FullMatch(path, MATCH_DIRECTORY_FILE_PATTERN, &table_uuid, &directory_name, &file_name)
|
||||||
|| RE2::FullMatch(path, MATCH_ROOT_FILE_PATTERN, &table_uuid, &file_name))
|
|| RE2::FullMatch(path, MATCH_ROOT_FILE_PATTERN, &table_uuid, &file_name)
|
||||||
|
|| RE2::FullMatch(path, MATCH_DIRECTORY_PATTERN, &table_uuid, &directory_name))
|
||||||
{
|
{
|
||||||
if (directory_name.empty())
|
if (directory_name.empty())
|
||||||
directory_name = file_name;
|
directory_name = file_name;
|
||||||
@ -204,12 +206,15 @@ bool DiskWebServer::findFileInMetadata(const String & path, File & file_info) co
|
|||||||
if (!metadata.tables_data[table_uuid].count(directory_name))
|
if (!metadata.tables_data[table_uuid].count(directory_name))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
if (file_name.empty())
|
||||||
|
return true;
|
||||||
|
|
||||||
const auto & files = metadata.tables_data[table_uuid][directory_name];
|
const auto & files = metadata.tables_data[table_uuid][directory_name];
|
||||||
auto file = files.find(File(file_name));
|
auto file = files.find(file_name);
|
||||||
if (file == files.end())
|
if (file == files.end())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
file_info = *file;
|
file_info = File(file->first, file->second);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -228,14 +233,17 @@ bool DiskWebServer::exists(const String & path) const
|
|||||||
|
|
||||||
std::unique_ptr<ReadBufferFromFileBase> DiskWebServer::readFile(const String & path, const ReadSettings & read_settings, size_t) const
|
std::unique_ptr<ReadBufferFromFileBase> DiskWebServer::readFile(const String & path, const ReadSettings & read_settings, size_t) const
|
||||||
{
|
{
|
||||||
LOG_DEBUG(log, "Read from file by path: {}", path);
|
|
||||||
|
|
||||||
File file;
|
File file;
|
||||||
if (!findFileInMetadata(path, file))
|
if (!findFileInMetadata(path, file))
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "File {} not found", path);
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "File {} not found", path);
|
||||||
|
|
||||||
RemoteMetadata meta(uri, fs::path(path).parent_path() / fs::path(path).filename());
|
auto file_name = escapeForFileName(fs::path(path).stem()) + fs::path(path).extension().string();
|
||||||
meta.remote_fs_objects.emplace_back(std::make_pair(getFileName(path), file.size));
|
auto remote_path = fs::path(path).parent_path() / file_name;
|
||||||
|
LOG_DEBUG(log, "Read from file by path: {}", remote_path.string());
|
||||||
|
|
||||||
|
RemoteMetadata meta(uri, remote_path);
|
||||||
|
meta.remote_fs_objects.emplace_back(std::make_pair(getFileName(remote_path), file.size));
|
||||||
|
|
||||||
auto reader = std::make_unique<ReadBufferFromWebServer>(uri, meta, getContext(), settings->max_read_tries, read_settings.remote_fs_buffer_size);
|
auto reader = std::make_unique<ReadBufferFromWebServer>(uri, meta, getContext(), settings->max_read_tries, read_settings.remote_fs_buffer_size);
|
||||||
return std::make_unique<SeekAvoidingReadBuffer>(std::move(reader), settings->min_bytes_for_seek);
|
return std::make_unique<SeekAvoidingReadBuffer>(std::move(reader), settings->min_bytes_for_seek);
|
||||||
@ -257,7 +265,7 @@ DiskDirectoryIteratorPtr DiskWebServer::iterateDirectory(const String & path)
|
|||||||
String uuid;
|
String uuid;
|
||||||
|
|
||||||
if (RE2::FullMatch(path, ".*/store/"))
|
if (RE2::FullMatch(path, ".*/store/"))
|
||||||
return std::make_unique<DiskWebDirectoryIterator<RootDirectory>>(metadata.tables_data, path);
|
return std::make_unique<DiskWebDirectoryIterator<UUIDDirectoryListing>>(metadata.tables_data, path);
|
||||||
|
|
||||||
if (!RE2::Extract(path, EXTRACT_UUID_PATTERN, "\\1", &uuid))
|
if (!RE2::Extract(path, EXTRACT_UUID_PATTERN, "\\1", &uuid))
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot extract uuid for: {}", path);
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot extract uuid for: {}", path);
|
||||||
@ -280,10 +288,20 @@ DiskDirectoryIteratorPtr DiskWebServer::iterateDirectory(const String & path)
|
|||||||
|
|
||||||
LOG_TRACE(&Poco::Logger::get("DiskWeb"), "Cannot load disk metadata. Error: {}", message);
|
LOG_TRACE(&Poco::Logger::get("DiskWeb"), "Cannot load disk metadata. Error: {}", message);
|
||||||
/// Empty iterator.
|
/// Empty iterator.
|
||||||
return std::make_unique<DiskWebDirectoryIterator<Directory>>(metadata.tables_data[""], path);
|
return std::make_unique<DiskWebDirectoryIterator<RootDirectoryListing>>(metadata.tables_data[""], path);
|
||||||
}
|
}
|
||||||
|
|
||||||
return std::make_unique<DiskWebDirectoryIterator<Directory>>(metadata.tables_data[uuid], path);
|
String directory_name;
|
||||||
|
if (RE2::FullMatch(path, MATCH_DIRECTORY_PATTERN, &uuid, &directory_name))
|
||||||
|
{
|
||||||
|
if (metadata.tables_data[uuid].contains(directory_name))
|
||||||
|
return std::make_unique<DiskWebDirectoryIterator<DirectoryListing>>(metadata.tables_data[uuid][directory_name], path);
|
||||||
|
if (can_throw)
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Directory {} does not exist. (uuid: {})", directory_name, uuid);
|
||||||
|
return std::make_unique<DiskWebDirectoryIterator<RootDirectoryListing>>(metadata.tables_data[""], path); /// Empty directory.
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::make_unique<DiskWebDirectoryIterator<RootDirectoryListing>>(metadata.tables_data[uuid], path);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -75,11 +75,9 @@ public:
|
|||||||
String name;
|
String name;
|
||||||
size_t size;
|
size_t size;
|
||||||
File(const String & name_ = "", const size_t size_ = 0) : name(name_), size(size_) {}
|
File(const String & name_ = "", const size_t size_ = 0) : name(name_), size(size_) {}
|
||||||
bool operator<(const File & other) const { return name < other.name; }
|
|
||||||
bool operator==(const File & other) const { return name == other.name; }
|
|
||||||
};
|
};
|
||||||
|
|
||||||
using Directory = std::set<File>;
|
using Directory = std::unordered_map<String, size_t>;
|
||||||
|
|
||||||
/* Each root directory contains either directories like
|
/* Each root directory contains either directories like
|
||||||
* all_x_x_x/{file}, detached/, etc, or root files like format_version.txt.
|
* all_x_x_x/{file}, detached/, etc, or root files like format_version.txt.
|
||||||
@ -96,11 +94,15 @@ public:
|
|||||||
/// Fetch meta only when required.
|
/// Fetch meta only when required.
|
||||||
mutable TableDirectories tables_data;
|
mutable TableDirectories tables_data;
|
||||||
|
|
||||||
Metadata() {}
|
Metadata() = default;
|
||||||
|
|
||||||
void initialize(const String & uri_with_path, const String & files_prefix, const String & uuid, ContextPtr context) const;
|
void initialize(const String & uri_with_path, const String & files_prefix, const String & uuid, ContextPtr context) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
using UUIDDirectoryListing = std::unordered_map<String, RootDirectory>;
|
||||||
|
using RootDirectoryListing = std::unordered_map<String, Directory>;
|
||||||
|
using DirectoryListing = std::unordered_map<String, size_t>;
|
||||||
|
|
||||||
bool findFileInMetadata(const String & path, File & file_info) const;
|
bool findFileInMetadata(const String & path, File & file_info) const;
|
||||||
|
|
||||||
bool supportZeroCopyReplication() const override { return false; }
|
bool supportZeroCopyReplication() const override { return false; }
|
||||||
|
Loading…
Reference in New Issue
Block a user