ClickHouse/src/Storages/MergeTree/BackgroundJobsExecutor.h

156 lines
4.9 KiB
C++
Raw Normal View History

2020-10-13 14:25:42 +00:00
#pragma once
#include <Storages/MergeTree/MergeTreeData.h>
#include <Common/ThreadPool.h>
#include <Core/BackgroundSchedulePool.h>
2020-10-14 12:32:35 +00:00
#include <pcg_random.hpp>
2020-10-13 14:25:42 +00:00
2020-10-14 12:44:10 +00:00
2020-10-13 14:25:42 +00:00
namespace DB
{
2020-10-14 12:44:10 +00:00
2020-10-16 10:12:31 +00:00
/// Settings for background tasks scheduling. Each background executor has one
/// BackgroundSchedulingPoolTask and depending on execution result may put this
/// task to sleep according to settings. Look at scheduleTask function for details.
struct BackgroundTaskSchedulingSettings
2020-10-14 12:44:10 +00:00
{
double thread_sleep_seconds_random_part = 1.0;
double thread_sleep_seconds_if_nothing_to_do = 0.1;
2020-10-16 10:12:31 +00:00
double task_sleep_seconds_when_no_work_max = 600;
2020-10-20 13:12:25 +00:00
/// For exponential backoff.
2020-10-16 10:12:31 +00:00
double task_sleep_seconds_when_no_work_multiplier = 1.1;
2020-10-14 14:56:42 +00:00
2020-10-14 12:44:10 +00:00
double task_sleep_seconds_when_no_work_random_part = 1.0;
2020-10-20 13:12:25 +00:00
/// deprected settings, don't affect background execution
double thread_sleep_seconds = 10;
double task_sleep_seconds_when_no_work_min = 10;
2020-10-14 14:56:42 +00:00
};
2020-10-14 12:44:10 +00:00
2020-10-16 10:12:31 +00:00
/// Pool type where we must execute new job. Each background executor can have several
/// background pools. When it receives new job it will execute new task in corresponding pool.
enum class PoolType
2020-10-14 14:56:42 +00:00
{
MERGE_MUTATE,
MOVE,
2020-10-14 12:44:10 +00:00
};
2020-10-13 14:25:42 +00:00
2020-10-16 10:12:31 +00:00
/// Result from background job providers. Function which will be executed in pool and pool type.
2020-10-14 14:56:42 +00:00
struct JobAndPool
2020-10-13 14:25:42 +00:00
{
2020-10-14 14:56:42 +00:00
ThreadPool::Job job;
PoolType pool_type;
};
2020-10-16 10:12:31 +00:00
/// Background jobs executor which execute heavy-weight background tasks for MergTree tables, like
/// background merges, moves, mutations, fetches and so on.
/// Consists of two important parts:
/// 1) Task in background scheduling pool which receives new jobs from storages and put them into required pool.
/// 2) One or more ThreadPool objects, which execute background jobs.
2020-10-14 14:56:42 +00:00
class IBackgroundJobExecutor
{
2020-10-16 10:12:31 +00:00
protected:
2020-10-14 07:22:48 +00:00
Context & global_context;
2020-10-16 10:12:31 +00:00
/// Configuration for single background ThreadPool
struct PoolConfig
{
/// This pool type
PoolType pool_type;
/// Max pool size in threads
size_t max_pool_size;
/// Metric that we have to increment when we execute task in this pool
CurrentMetrics::Metric tasks_metric;
};
2020-10-14 14:56:42 +00:00
private:
2020-10-16 10:12:31 +00:00
/// Name for task in background scheduling pool
2020-10-14 14:56:42 +00:00
String task_name;
2020-10-16 10:12:31 +00:00
/// Settings for execution control of background scheduling task
BackgroundTaskSchedulingSettings sleep_settings;
/// Useful for random backoff timeouts generation
2020-10-14 12:32:35 +00:00
pcg64 rng;
2020-10-13 14:25:42 +00:00
2020-10-16 10:12:31 +00:00
/// How many times execution of background job failed or we have
/// no new jobs.
2020-10-15 13:57:50 +00:00
std::atomic<size_t> no_work_done_count{0};
2020-10-14 14:56:42 +00:00
2020-10-16 10:12:31 +00:00
/// Pools where we execute background jobs
2020-10-14 14:56:42 +00:00
std::unordered_map<PoolType, ThreadPool> pools;
2020-10-16 10:12:31 +00:00
/// Configs for background pools
2020-10-14 14:56:42 +00:00
std::unordered_map<PoolType, PoolConfig> pools_configs;
2020-10-13 14:25:42 +00:00
2020-10-16 10:12:31 +00:00
/// Scheduling task which assign jobs in background pool
2020-10-14 14:56:42 +00:00
BackgroundSchedulePool::TaskHolder scheduling_task;
2020-10-16 10:12:31 +00:00
/// Mutex for thread safety
2020-10-15 10:41:36 +00:00
std::mutex scheduling_task_mutex;
2020-10-16 13:48:12 +00:00
/// Mutex for pcg random generator thread safety
std::mutex random_mutex;
2020-10-13 14:25:42 +00:00
public:
2020-10-16 10:12:31 +00:00
/// These three functions are thread safe
/// Start background task and start to assign jobs
2020-10-13 14:25:42 +00:00
void start();
2020-10-16 10:12:31 +00:00
/// Schedule background task as soon as possible, even if it sleep at this
/// moment for some reason.
2020-10-15 07:43:50 +00:00
void triggerTask();
2020-10-16 10:12:31 +00:00
/// Finish execution: deactivate background task and wait already scheduled jobs
2020-10-13 14:25:42 +00:00
void finish();
2020-10-14 14:56:42 +00:00
2020-10-16 10:12:31 +00:00
/// Just call finish
2020-10-14 14:56:42 +00:00
virtual ~IBackgroundJobExecutor();
protected:
2020-10-15 10:22:02 +00:00
IBackgroundJobExecutor(
Context & global_context_,
2020-10-16 10:12:31 +00:00
const BackgroundTaskSchedulingSettings & sleep_settings_,
2020-10-15 10:22:02 +00:00
const std::vector<PoolConfig> & pools_configs_);
2020-10-16 10:12:31 +00:00
/// Name for task in background schedule pool
virtual String getBackgroundTaskName() const = 0;
/// Get job for background execution
2020-10-14 14:56:42 +00:00
virtual std::optional<JobAndPool> getBackgroundJob() = 0;
2020-10-15 10:22:02 +00:00
2020-10-14 14:56:42 +00:00
private:
2020-10-16 10:12:31 +00:00
/// Function that executes in background scheduling pool
2020-10-14 14:56:42 +00:00
void jobExecutingTask();
2020-10-16 10:12:31 +00:00
/// Recalculate timeouts when we have to check for a new job
2020-10-21 08:31:57 +00:00
void scheduleTask(bool job_done, bool with_backoff=false);
2020-10-16 13:48:12 +00:00
/// Return random add for sleep in case of error
double getSleepRandomAdd();
2020-10-13 14:25:42 +00:00
};
2020-10-14 14:56:42 +00:00
2020-10-16 10:12:31 +00:00
/// Main jobs executor: merges, mutations, fetches and so on
2020-10-14 14:56:42 +00:00
class BackgroundJobsExecutor final : public IBackgroundJobExecutor
{
2020-10-15 10:22:02 +00:00
private:
MergeTreeData & data;
2020-10-14 14:56:42 +00:00
public:
BackgroundJobsExecutor(
MergeTreeData & data_,
Context & global_context_);
protected:
2020-10-16 10:12:31 +00:00
String getBackgroundTaskName() const override;
2020-10-14 14:56:42 +00:00
std::optional<JobAndPool> getBackgroundJob() override;
};
2020-10-16 10:12:31 +00:00
/// Move jobs executor, move parts between disks in the background
/// Does nothing in case of default configuration
2020-10-14 14:56:42 +00:00
class BackgroundMovesExecutor final : public IBackgroundJobExecutor
{
2020-10-15 10:22:02 +00:00
private:
MergeTreeData & data;
2020-10-14 14:56:42 +00:00
public:
BackgroundMovesExecutor(
MergeTreeData & data_,
Context & global_context_);
protected:
2020-10-16 10:12:31 +00:00
String getBackgroundTaskName() const override;
2020-10-14 14:56:42 +00:00
std::optional<JobAndPool> getBackgroundJob() override;
};
2020-10-13 14:25:42 +00:00
}