mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-19 16:20:50 +00:00
Refactor
This commit is contained in:
parent
de308acfad
commit
3a299f382d
@ -110,16 +110,19 @@ private:
|
||||
ObjectStorageQueueMetadata::ObjectStorageQueueMetadata(
|
||||
const fs::path & zookeeper_path_,
|
||||
const ObjectStorageQueueTableMetadata & table_metadata_,
|
||||
const ObjectStorageQueueSettings & settings_)
|
||||
: settings(settings_)
|
||||
, table_metadata(table_metadata_)
|
||||
size_t cleanup_interval_min_ms_,
|
||||
size_t cleanup_interval_max_ms_)
|
||||
: table_metadata(table_metadata_)
|
||||
, mode(table_metadata.getMode())
|
||||
, zookeeper_path(zookeeper_path_)
|
||||
, buckets_num(getBucketsNum(settings_))
|
||||
, buckets_num(getBucketsNum(table_metadata_))
|
||||
, cleanup_interval_min_ms(cleanup_interval_min_ms_)
|
||||
, cleanup_interval_max_ms(cleanup_interval_max_ms_)
|
||||
, log(getLogger("StorageObjectStorageQueue(" + zookeeper_path_.string() + ")"))
|
||||
, local_file_statuses(std::make_shared<LocalFileStatuses>())
|
||||
{
|
||||
if (settings.mode == ObjectStorageQueueMode::UNORDERED
|
||||
&& (settings.tracked_files_limit || settings.tracked_file_ttl_sec))
|
||||
if (mode == ObjectStorageQueueMode::UNORDERED
|
||||
&& (table_metadata.tracked_files_limit || table_metadata.tracked_file_ttl_sec))
|
||||
{
|
||||
task = Context::getGlobalContextInstance()->getSchedulePool().createTask(
|
||||
"ObjectStorageQueueCleanupFunc",
|
||||
@ -128,10 +131,10 @@ ObjectStorageQueueMetadata::ObjectStorageQueueMetadata(
|
||||
task->activate();
|
||||
task->scheduleAfter(
|
||||
generateRescheduleInterval(
|
||||
settings.cleanup_interval_min_ms, settings.cleanup_interval_max_ms));
|
||||
cleanup_interval_min_ms, cleanup_interval_max_ms));
|
||||
}
|
||||
LOG_TRACE(log, "Mode: {}, buckets: {}, processing threads: {}, result buckets num: {}",
|
||||
settings.mode.toString(), settings.buckets, settings.processing_threads_num, buckets_num);
|
||||
table_metadata.mode, table_metadata.buckets, table_metadata.processing_threads_num, buckets_num);
|
||||
|
||||
}
|
||||
|
||||
@ -162,7 +165,7 @@ ObjectStorageQueueMetadata::FileMetadataPtr ObjectStorageQueueMetadata::getFileM
|
||||
ObjectStorageQueueOrderedFileMetadata::BucketInfoPtr bucket_info)
|
||||
{
|
||||
auto file_status = local_file_statuses->get(path, /* create */true);
|
||||
switch (settings.mode.value)
|
||||
switch (mode)
|
||||
{
|
||||
case ObjectStorageQueueMode::ORDERED:
|
||||
return std::make_shared<ObjectStorageQueueOrderedFileMetadata>(
|
||||
@ -171,39 +174,28 @@ ObjectStorageQueueMetadata::FileMetadataPtr ObjectStorageQueueMetadata::getFileM
|
||||
file_status,
|
||||
bucket_info,
|
||||
buckets_num,
|
||||
settings.loading_retries,
|
||||
table_metadata.loading_retries,
|
||||
log);
|
||||
case ObjectStorageQueueMode::UNORDERED:
|
||||
return std::make_shared<ObjectStorageQueueUnorderedFileMetadata>(
|
||||
zookeeper_path,
|
||||
path,
|
||||
file_status,
|
||||
settings.loading_retries,
|
||||
table_metadata.loading_retries,
|
||||
log);
|
||||
}
|
||||
}
|
||||
|
||||
size_t ObjectStorageQueueMetadata::getBucketsNum(const ObjectStorageQueueSettings & settings)
|
||||
size_t ObjectStorageQueueMetadata::getBucketsNum(const ObjectStorageQueueTableMetadata & metadata)
|
||||
{
|
||||
if (settings.buckets)
|
||||
return settings.buckets;
|
||||
if (settings.processing_threads_num)
|
||||
return settings.processing_threads_num;
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t ObjectStorageQueueMetadata::getBucketsNum(const ObjectStorageQueueTableMetadata & settings)
|
||||
{
|
||||
if (settings.buckets)
|
||||
return settings.buckets;
|
||||
if (settings.processing_threads_num)
|
||||
return settings.processing_threads_num;
|
||||
return 0;
|
||||
if (metadata.buckets)
|
||||
return metadata.buckets;
|
||||
return metadata.processing_threads_num;
|
||||
}
|
||||
|
||||
bool ObjectStorageQueueMetadata::useBucketsForProcessing() const
|
||||
{
|
||||
return settings.mode == ObjectStorageQueueMode::ORDERED && (buckets_num > 1);
|
||||
return mode == ObjectStorageQueueMode::ORDERED && (buckets_num > 1);
|
||||
}
|
||||
|
||||
ObjectStorageQueueMetadata::Bucket ObjectStorageQueueMetadata::getBucketForPath(const std::string & path) const
|
||||
@ -217,18 +209,35 @@ ObjectStorageQueueMetadata::tryAcquireBucket(const Bucket & bucket, const Proces
|
||||
return ObjectStorageQueueOrderedFileMetadata::tryAcquireBucket(zookeeper_path, bucket, processor, log);
|
||||
}
|
||||
|
||||
void ObjectStorageQueueMetadata::syncWithKeeper(
|
||||
ObjectStorageQueueTableMetadata ObjectStorageQueueMetadata::syncWithKeeper(
|
||||
const fs::path & zookeeper_path,
|
||||
const ObjectStorageQueueTableMetadata & table_metadata,
|
||||
const ObjectStorageQueueSettings & settings,
|
||||
const ColumnsDescription & columns,
|
||||
const std::string & format,
|
||||
LoggerPtr log)
|
||||
{
|
||||
const auto table_metadata_path = zookeeper_path / "metadata";
|
||||
const auto buckets_num = getBucketsNum(settings);
|
||||
const auto metadata_paths = settings.mode == ObjectStorageQueueMode::ORDERED
|
||||
? ObjectStorageQueueOrderedFileMetadata::getMetadataPaths(buckets_num)
|
||||
: ObjectStorageQueueUnorderedFileMetadata::getMetadataPaths();
|
||||
ObjectStorageQueueTableMetadata table_metadata(settings, columns, format);
|
||||
|
||||
std::vector<std::string> metadata_paths;
|
||||
size_t buckets_num = 0;
|
||||
if (settings.mode == ObjectStorageQueueMode::ORDERED)
|
||||
{
|
||||
buckets_num = getBucketsNum(table_metadata);
|
||||
if (buckets_num == 0)
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Cannot have zero values of `processing_threads_num` and `buckets`");
|
||||
|
||||
LOG_TRACE(log, "Local buckets num: {}", buckets_num);
|
||||
|
||||
metadata_paths = ObjectStorageQueueOrderedFileMetadata::getMetadataPaths(buckets_num);
|
||||
}
|
||||
else
|
||||
{
|
||||
metadata_paths = ObjectStorageQueueUnorderedFileMetadata::getMetadataPaths();
|
||||
}
|
||||
|
||||
const auto table_metadata_path = zookeeper_path / "metadata";
|
||||
auto zookeeper = getZooKeeper();
|
||||
zookeeper->createAncestors(zookeeper_path);
|
||||
|
||||
@ -242,7 +251,7 @@ void ObjectStorageQueueMetadata::syncWithKeeper(
|
||||
LOG_TRACE(log, "Metadata in keeper: {}", metadata_str);
|
||||
|
||||
table_metadata.checkEquals(metadata_from_zk);
|
||||
return;
|
||||
return table_metadata;
|
||||
}
|
||||
|
||||
Coordination::Requests requests;
|
||||
@ -256,15 +265,15 @@ void ObjectStorageQueueMetadata::syncWithKeeper(
|
||||
requests.emplace_back(zkutil::makeCreateRequest(zk_path, "", zkutil::CreateMode::Persistent));
|
||||
}
|
||||
|
||||
if (!settings.last_processed_path.value.empty())
|
||||
if (!table_metadata.last_processed_path.empty())
|
||||
{
|
||||
ObjectStorageQueueOrderedFileMetadata(
|
||||
zookeeper_path,
|
||||
settings.last_processed_path,
|
||||
table_metadata.last_processed_path,
|
||||
std::make_shared<FileStatus>(),
|
||||
/* bucket_info */nullptr,
|
||||
buckets_num,
|
||||
settings.loading_retries,
|
||||
table_metadata.loading_retries,
|
||||
log).setProcessedAtStartRequests(requests, zookeeper);
|
||||
}
|
||||
|
||||
@ -283,7 +292,7 @@ void ObjectStorageQueueMetadata::syncWithKeeper(
|
||||
else if (code != Coordination::Error::ZOK)
|
||||
zkutil::KeeperMultiException::check(code, requests, responses);
|
||||
|
||||
return;
|
||||
return table_metadata;
|
||||
}
|
||||
|
||||
throw Exception(
|
||||
@ -295,7 +304,7 @@ void ObjectStorageQueueMetadata::syncWithKeeper(
|
||||
void ObjectStorageQueueMetadata::cleanupThreadFunc()
|
||||
{
|
||||
/// A background task is responsible for maintaining
|
||||
/// settings.tracked_files_limit and max_set_age settings for `unordered` processing mode.
|
||||
/// table_metadata.tracked_files_limit and max_set_age settings for `unordered` processing mode.
|
||||
|
||||
if (shutdown_called)
|
||||
return;
|
||||
@ -314,7 +323,7 @@ void ObjectStorageQueueMetadata::cleanupThreadFunc()
|
||||
|
||||
task->scheduleAfter(
|
||||
generateRescheduleInterval(
|
||||
settings.cleanup_interval_min_ms, settings.cleanup_interval_max_ms));
|
||||
cleanup_interval_min_ms, cleanup_interval_max_ms));
|
||||
}
|
||||
|
||||
void ObjectStorageQueueMetadata::cleanupThreadFuncImpl()
|
||||
@ -357,11 +366,11 @@ void ObjectStorageQueueMetadata::cleanupThreadFuncImpl()
|
||||
return;
|
||||
}
|
||||
|
||||
chassert(settings.tracked_files_limit || settings.tracked_file_ttl_sec);
|
||||
const bool check_nodes_limit = settings.tracked_files_limit > 0;
|
||||
const bool check_nodes_ttl = settings.tracked_file_ttl_sec > 0;
|
||||
chassert(table_metadata.tracked_files_limit || table_metadata.tracked_file_ttl_sec);
|
||||
const bool check_nodes_limit = table_metadata.tracked_files_limit > 0;
|
||||
const bool check_nodes_ttl = table_metadata.tracked_file_ttl_sec > 0;
|
||||
|
||||
const bool nodes_limit_exceeded = nodes_num > settings.tracked_files_limit;
|
||||
const bool nodes_limit_exceeded = nodes_num > table_metadata.tracked_files_limit;
|
||||
if ((!nodes_limit_exceeded || !check_nodes_limit) && !check_nodes_ttl)
|
||||
{
|
||||
LOG_TEST(log, "No limit exceeded");
|
||||
@ -434,9 +443,9 @@ void ObjectStorageQueueMetadata::cleanupThreadFuncImpl()
|
||||
wb << fmt::format("Node: {}, path: {}, timestamp: {};\n", node, metadata.file_path, metadata.last_processed_timestamp);
|
||||
return wb.str();
|
||||
};
|
||||
LOG_TEST(log, "Checking node limits (max size: {}, max age: {}) for {}", settings.tracked_files_limit, settings.tracked_file_ttl_sec, get_nodes_str());
|
||||
LOG_TEST(log, "Checking node limits (max size: {}, max age: {}) for {}", table_metadata.tracked_files_limit, table_metadata.tracked_file_ttl_sec, get_nodes_str());
|
||||
|
||||
size_t nodes_to_remove = check_nodes_limit && nodes_limit_exceeded ? nodes_num - settings.tracked_files_limit : 0;
|
||||
size_t nodes_to_remove = check_nodes_limit && nodes_limit_exceeded ? nodes_num - table_metadata.tracked_files_limit : 0;
|
||||
for (const auto & node : sorted_nodes)
|
||||
{
|
||||
if (nodes_to_remove)
|
||||
@ -455,7 +464,7 @@ void ObjectStorageQueueMetadata::cleanupThreadFuncImpl()
|
||||
else if (check_nodes_ttl)
|
||||
{
|
||||
UInt64 node_age = getCurrentTime() - node.metadata.last_processed_timestamp;
|
||||
if (node_age >= settings.tracked_file_ttl_sec)
|
||||
if (node_age >= table_metadata.tracked_file_ttl_sec)
|
||||
{
|
||||
LOG_TRACE(log, "Removing node at path {} ({}) because file ttl is reached",
|
||||
node.metadata.file_path, node.zk_path);
|
||||
|
@ -56,14 +56,16 @@ public:
|
||||
ObjectStorageQueueMetadata(
|
||||
const fs::path & zookeeper_path_,
|
||||
const ObjectStorageQueueTableMetadata & table_metadata_,
|
||||
const ObjectStorageQueueSettings & settings_);
|
||||
size_t cleanup_interval_min_ms_,
|
||||
size_t cleanup_interval_max_ms_);
|
||||
|
||||
~ObjectStorageQueueMetadata();
|
||||
|
||||
static void syncWithKeeper(
|
||||
static ObjectStorageQueueTableMetadata syncWithKeeper(
|
||||
const fs::path & zookeeper_path,
|
||||
const ObjectStorageQueueTableMetadata & table_metadata,
|
||||
const ObjectStorageQueueSettings & settings,
|
||||
const ColumnsDescription & columns,
|
||||
const std::string & format,
|
||||
LoggerPtr log);
|
||||
|
||||
void shutdown();
|
||||
@ -78,8 +80,7 @@ public:
|
||||
Bucket getBucketForPath(const std::string & path) const;
|
||||
ObjectStorageQueueOrderedFileMetadata::BucketHolderPtr tryAcquireBucket(const Bucket & bucket, const Processor & processor);
|
||||
|
||||
static size_t getBucketsNum(const ObjectStorageQueueSettings & settings);
|
||||
static size_t getBucketsNum(const ObjectStorageQueueTableMetadata & settings);
|
||||
static size_t getBucketsNum(const ObjectStorageQueueTableMetadata & metadata);
|
||||
|
||||
void checkTableMetadataEquals(const ObjectStorageQueueMetadata & other);
|
||||
|
||||
@ -90,10 +91,11 @@ private:
|
||||
void cleanupThreadFunc();
|
||||
void cleanupThreadFuncImpl();
|
||||
|
||||
ObjectStorageQueueSettings settings;
|
||||
ObjectStorageQueueTableMetadata table_metadata;
|
||||
const ObjectStorageQueueMode mode;
|
||||
const fs::path zookeeper_path;
|
||||
const size_t buckets_num;
|
||||
const size_t cleanup_interval_min_ms, cleanup_interval_max_ms;
|
||||
|
||||
LoggerPtr log;
|
||||
|
||||
|
@ -42,6 +42,7 @@ ObjectStorageQueueTableMetadata::ObjectStorageQueueTableMetadata(
|
||||
, tracked_file_ttl_sec(engine_settings.tracked_file_ttl_sec)
|
||||
, buckets(engine_settings.buckets)
|
||||
, processing_threads_num(engine_settings.processing_threads_num)
|
||||
, loading_retries(engine_settings.loading_retries)
|
||||
{
|
||||
}
|
||||
|
||||
@ -57,6 +58,7 @@ String ObjectStorageQueueTableMetadata::toString() const
|
||||
json.set("format_name", format_name);
|
||||
json.set("columns", columns);
|
||||
json.set("last_processed_file", last_processed_path);
|
||||
json.set("loading_retries", loading_retries);
|
||||
|
||||
std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
||||
oss.exceptions(std::ios::failbit);
|
||||
@ -64,6 +66,11 @@ String ObjectStorageQueueTableMetadata::toString() const
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
ObjectStorageQueueMode ObjectStorageQueueTableMetadata::getMode() const
|
||||
{
|
||||
return modeFromString(mode);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static auto getOrDefault(
|
||||
const Poco::JSON::Object::Ptr & json,
|
||||
@ -90,6 +97,7 @@ ObjectStorageQueueTableMetadata::ObjectStorageQueueTableMetadata(const Poco::JSO
|
||||
, buckets(getOrDefault(json, "buckets", "", 0))
|
||||
, processing_threads_num(getOrDefault(json, "processing_threads_num", "s3queue_", 1))
|
||||
, last_processed_path(getOrDefault<String>(json, "last_processed_file", "s3queue_", ""))
|
||||
, loading_retries(getOrDefault(json, "loading_retries", "", 10))
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -27,6 +27,7 @@ struct ObjectStorageQueueTableMetadata
|
||||
const UInt64 buckets;
|
||||
const UInt64 processing_threads_num;
|
||||
const String last_processed_path;
|
||||
const UInt64 loading_retries;
|
||||
|
||||
ObjectStorageQueueTableMetadata(
|
||||
const ObjectStorageQueueSettings & engine_settings,
|
||||
@ -39,6 +40,8 @@ struct ObjectStorageQueueTableMetadata
|
||||
|
||||
String toString() const;
|
||||
|
||||
ObjectStorageQueueMode getMode() const;
|
||||
|
||||
void checkEquals(const ObjectStorageQueueTableMetadata & from_zk) const;
|
||||
|
||||
private:
|
||||
|
@ -62,7 +62,7 @@ namespace
|
||||
return zkutil::extractZooKeeperPath(result_zk_path, true);
|
||||
}
|
||||
|
||||
void checkAndAdjustSettings(
|
||||
void validateSettings(
|
||||
ObjectStorageQueueSettings & queue_settings,
|
||||
bool is_attach)
|
||||
{
|
||||
@ -144,7 +144,7 @@ StorageObjectStorageQueue::StorageObjectStorageQueue(
|
||||
throw Exception(ErrorCodes::BAD_QUERY_PARAMETER, "ObjectStorageQueue url must either end with '/' or contain globs");
|
||||
}
|
||||
|
||||
checkAndAdjustSettings(*queue_settings, mode > LoadingStrictnessLevel::CREATE);
|
||||
validateSettings(*queue_settings, mode > LoadingStrictnessLevel::CREATE);
|
||||
|
||||
object_storage = configuration->createObjectStorage(context_, /* is_readonly */true);
|
||||
FormatFactory::instance().checkFormatName(configuration->format);
|
||||
@ -164,10 +164,12 @@ StorageObjectStorageQueue::StorageObjectStorageQueue(
|
||||
|
||||
LOG_INFO(log, "Using zookeeper path: {}", zk_path.string());
|
||||
|
||||
ObjectStorageQueueTableMetadata table_metadata(*queue_settings, storage_metadata.getColumns(), configuration_->format);
|
||||
ObjectStorageQueueMetadata::syncWithKeeper(zk_path, table_metadata, *queue_settings, log);
|
||||
auto table_metadata = ObjectStorageQueueMetadata::syncWithKeeper(
|
||||
zk_path, *queue_settings, storage_metadata.getColumns(), configuration_->format, log);
|
||||
|
||||
auto queue_metadata = std::make_unique<ObjectStorageQueueMetadata>(
|
||||
zk_path, std::move(table_metadata), queue_settings->cleanup_interval_min_ms, queue_settings->cleanup_interval_max_ms);
|
||||
|
||||
auto queue_metadata = std::make_unique<ObjectStorageQueueMetadata>(zk_path, std::move(table_metadata), *queue_settings);
|
||||
files_metadata = ObjectStorageQueueMetadataFactory::instance().getOrCreate(zk_path, std::move(queue_metadata));
|
||||
|
||||
task = getContext()->getSchedulePool().createTask("ObjectStorageQueueStreamingTask", [this] { threadFunc(); });
|
||||
|
Loading…
Reference in New Issue
Block a user