ClickHouse/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h

211 lines
6.8 KiB
C++
Raw Normal View History

2021-08-30 19:37:03 +00:00
#pragma once
#include <deque>
#include <functional>
#include <atomic>
#include <mutex>
#include <future>
2021-08-30 19:37:03 +00:00
#include <condition_variable>
#include <set>
#include <iostream>
2021-08-30 19:37:03 +00:00
2021-09-03 22:15:20 +00:00
#include <boost/circular_buffer.hpp>
2021-10-02 07:13:14 +00:00
#include <base/shared_ptr_helper.h>
#include <base/logger_useful.h>
2021-08-30 19:37:03 +00:00
#include <Common/ThreadPool.h>
2021-09-02 17:40:29 +00:00
#include <Common/Stopwatch.h>
2021-09-06 22:07:41 +00:00
#include <Storages/MergeTree/IExecutableTask.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
2021-08-30 19:37:03 +00:00
struct TaskRuntimeData;
using TaskRuntimeDataPtr = std::shared_ptr<TaskRuntimeData>;
2021-08-30 19:37:03 +00:00
/**
* Has RAII class to determine how many tasks are waiting for the execution and executing at the moment.
* Also has some flags and primitives to wait for current task to be executed.
*/
struct TaskRuntimeData
{
TaskRuntimeData(ExecutableTaskPtr && task_, CurrentMetrics::Metric metric_)
: task(std::move(task_))
, increment(std::move(metric_))
{}
ExecutableTaskPtr task;
CurrentMetrics::Increment increment;
std::atomic_bool is_currently_deleting{false};
/// Actually autoreset=false is needed only for unit test
/// where multiple threads could remove tasks corresponding to the same storage
/// This scenario in not possible in reality.
Poco::Event is_done{/*autoreset=*/false};
/// This is equal to task->getPriority() not to do useless virtual calls in comparator
UInt64 priority{0};
/// By default priority queue will have max element at top
static bool comparePtrByPriority(const TaskRuntimeDataPtr & lhs, const TaskRuntimeDataPtr & rhs)
{
return lhs->priority > rhs->priority;
}
};
class OrdinaryRuntimeQueue
{
public:
TaskRuntimeDataPtr pop()
{
auto result = std::move(queue.front());
queue.pop_front();
return result;
}
void push(TaskRuntimeDataPtr item) { queue.push_back(std::move(item));}
void remove(StorageID id)
{
auto it = std::remove_if(queue.begin(), queue.end(),
[&] (auto item) -> bool { return item->task->getStorageID() == id; });
queue.erase(it, queue.end());
}
void setCapacity(size_t count) { queue.set_capacity(count); }
bool empty() { return queue.empty(); }
private:
boost::circular_buffer<TaskRuntimeDataPtr> queue{0};
};
/// Uses a heap to pop a task with minimal priority
class MergeMutateRuntimeQueue
2021-08-30 19:37:03 +00:00
{
public:
TaskRuntimeDataPtr pop()
{
std::pop_heap(buffer.begin(), buffer.end(), TaskRuntimeData::comparePtrByPriority);
auto result = std::move(buffer.back());
buffer.pop_back();
return result;
}
void push(TaskRuntimeDataPtr item)
{
item->priority = item->task->getPriority();
buffer.push_back(std::move(item));
std::push_heap(buffer.begin(), buffer.end(), TaskRuntimeData::comparePtrByPriority);
}
void remove(StorageID id)
{
auto it = std::remove_if(buffer.begin(), buffer.end(),
[&] (auto item) -> bool { return item->task->getStorageID() == id; });
buffer.erase(it, buffer.end());
std::make_heap(buffer.begin(), buffer.end(), TaskRuntimeData::comparePtrByPriority);
}
void setCapacity(size_t count) { buffer.reserve(count); }
bool empty() { return buffer.empty(); }
private:
std::vector<TaskRuntimeDataPtr> buffer{};
};
2021-08-30 19:37:03 +00:00
2021-09-03 22:15:20 +00:00
/**
* Executor for a background MergeTree related operations such as merges, mutations, fetches an so on.
* It can execute only successors of ExecutableTask interface.
2021-09-08 12:42:03 +00:00
* Which is a self-written coroutine. It suspends, when returns true from executeStep() method.
2021-09-03 22:15:20 +00:00
*
* There are two queues of a tasks: pending (main queue for all the tasks) and active (currently executing).
2021-09-08 00:21:21 +00:00
* Pending queue is needed since the number of tasks will be more than thread to execute.
* Pending tasks are tasks that successfully scheduled to an executor or tasks that have some extra steps to execute.
2021-09-03 22:15:20 +00:00
* There is an invariant, that task may occur only in one of these queue. It can occur in both queues only in critical sections.
*
2021-09-08 00:21:21 +00:00
* Pending: Active:
*
* |s| |s| |s| |s| |s| |s| |s| |s| |s| |s| |s|
* |s| |s| |s| |s| |s| |s| |s| |s| |s| |s|
* |s| |s| |s| |s| |s| |s| |s|
* |s| |s| |s| |s|
* |s| |s|
* |s|
*
* Each task is simply a sequence of steps. Heavier tasks have longer sequences.
* When a step of a task is executed, we move tasks to pending queue. And take another from the queue's head.
* With these architecture all small merges / mutations will be executed faster, than bigger ones.
*
* We use boost::circular_buffer as a container for queues not to do any allocations.
2021-09-03 22:15:20 +00:00
*
2021-09-06 12:01:16 +00:00
* Another nuisance that we faces with is than background operations always interact with an associated Storage.
2021-09-03 22:15:20 +00:00
* So, when a Storage want to shutdown, it must wait until all its background operaions are finished.
*/
template <class Queue>
class MergeTreeBackgroundExecutor final : public shared_ptr_helper<MergeTreeBackgroundExecutor<Queue>>
2021-08-30 19:37:03 +00:00
{
public:
2021-09-02 17:40:29 +00:00
MergeTreeBackgroundExecutor(
String name_,
2021-09-08 12:42:03 +00:00
size_t threads_count_,
size_t max_tasks_count_,
2021-09-02 17:40:29 +00:00
CurrentMetrics::Metric metric_)
: name(name_)
2021-09-08 12:42:03 +00:00
, threads_count(threads_count_)
, max_tasks_count(max_tasks_count_)
2021-09-02 17:40:29 +00:00
, metric(metric_)
2021-08-30 19:37:03 +00:00
{
if (max_tasks_count == 0)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Task count for MergeTreeBackgroundExecutor must not be zero");
2021-09-02 17:40:29 +00:00
pending.setCapacity(max_tasks_count);
2021-09-08 20:30:29 +00:00
active.set_capacity(max_tasks_count);
2021-09-08 12:42:03 +00:00
pool.setMaxThreads(std::max(1UL, threads_count));
pool.setMaxFreeThreads(std::max(1UL, threads_count));
pool.setQueueSize(std::max(1UL, threads_count));
for (size_t number = 0; number < threads_count; ++number)
pool.scheduleOrThrowOnError([this] { threadFunction(); });
2021-08-30 19:37:03 +00:00
}
~MergeTreeBackgroundExecutor()
{
wait();
}
2021-09-03 22:15:20 +00:00
bool trySchedule(ExecutableTaskPtr task);
2021-08-31 11:02:39 +00:00
void removeTasksCorrespondingToStorage(StorageID id);
2021-09-03 22:15:20 +00:00
void wait();
2021-08-30 19:37:03 +00:00
private:
2021-09-02 10:39:27 +00:00
String name;
2021-09-02 17:40:29 +00:00
size_t threads_count{0};
size_t max_tasks_count{0};
2021-09-08 12:42:03 +00:00
CurrentMetrics::Metric metric;
2021-09-02 17:40:29 +00:00
2021-09-08 00:21:21 +00:00
void routine(TaskRuntimeDataPtr item);
2021-09-08 12:42:03 +00:00
void threadFunction();
2021-09-03 12:27:49 +00:00
2021-09-02 17:40:29 +00:00
/// Initially it will be empty
Queue pending{};
2021-09-08 00:21:21 +00:00
boost::circular_buffer<TaskRuntimeDataPtr> active{0};
2021-08-30 19:37:03 +00:00
std::mutex mutex;
std::condition_variable has_tasks;
2021-09-03 22:15:20 +00:00
std::atomic_bool shutdown{false};
2021-08-30 19:37:03 +00:00
ThreadPool pool;
};
extern template class MergeTreeBackgroundExecutor<MergeMutateRuntimeQueue>;
extern template class MergeTreeBackgroundExecutor<OrdinaryRuntimeQueue>;
using MergeMutateBackgroundExecutor = MergeTreeBackgroundExecutor<MergeMutateRuntimeQueue>;
using OrdinaryBackgroundExecutor = MergeTreeBackgroundExecutor<OrdinaryRuntimeQueue>;
2021-08-30 19:37:03 +00:00
}