2021-08-30 19:37:03 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <deque>
|
|
|
|
#include <functional>
|
|
|
|
#include <atomic>
|
|
|
|
#include <mutex>
|
2021-09-02 21:31:32 +00:00
|
|
|
#include <future>
|
2021-08-30 19:37:03 +00:00
|
|
|
#include <condition_variable>
|
2021-09-02 21:31:32 +00:00
|
|
|
#include <set>
|
2021-08-30 19:37:03 +00:00
|
|
|
|
2021-09-03 22:15:20 +00:00
|
|
|
#include <boost/circular_buffer.hpp>
|
|
|
|
|
2021-08-30 19:37:03 +00:00
|
|
|
#include <common/shared_ptr_helper.h>
|
2021-09-03 13:02:49 +00:00
|
|
|
#include <common/logger_useful.h>
|
2021-08-30 19:37:03 +00:00
|
|
|
#include <Common/ThreadPool.h>
|
2021-09-02 17:40:29 +00:00
|
|
|
#include <Common/Stopwatch.h>
|
2021-09-06 22:07:41 +00:00
|
|
|
#include <Storages/MergeTree/IExecutableTask.h>
|
2021-08-30 19:37:03 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2021-09-03 22:15:20 +00:00
|
|
|
/**
|
|
|
|
* Executor for a background MergeTree related operations such as merges, mutations, fetches an so on.
|
|
|
|
* It can execute only successors of ExecutableTask interface.
|
|
|
|
* Which is a self-written coroutine. It suspends, when returns true from execute() method.
|
|
|
|
*
|
|
|
|
* Executor consists of ThreadPool to execute pieces of a task (basically calls 'execute' on a task)
|
|
|
|
* and a scheduler thread, which manages the tasks. Due to bad experience of working with high memory under
|
|
|
|
* high memory pressure scheduler thread mustn't do any allocations,
|
|
|
|
* because it will be a fatal error if this thread will die from a random exception.
|
|
|
|
*
|
|
|
|
* There are two queues of a tasks: pending (main queue for all the tasks) and active (currently executing).
|
|
|
|
* There is an invariant, that task may occur only in one of these queue. It can occur in both queues only in critical sections.
|
|
|
|
*
|
|
|
|
* Due to all caveats I described above we use boost::circular_buffer as a container for queues.
|
|
|
|
*
|
2021-09-06 12:01:16 +00:00
|
|
|
* Another nuisance that we faces with is than background operations always interact with an associated Storage.
|
2021-09-03 22:15:20 +00:00
|
|
|
* So, when a Storage want to shutdown, it must wait until all its background operaions are finished.
|
|
|
|
*/
|
2021-08-30 19:37:03 +00:00
|
|
|
class MergeTreeBackgroundExecutor : public shared_ptr_helper<MergeTreeBackgroundExecutor>
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
|
|
|
|
using CountGetter = std::function<size_t()>;
|
|
|
|
using Callback = std::function<void()>;
|
|
|
|
|
2021-09-02 10:39:27 +00:00
|
|
|
enum class Type
|
|
|
|
{
|
|
|
|
MERGE_MUTATE,
|
|
|
|
FETCH,
|
|
|
|
MOVE
|
|
|
|
};
|
2021-08-30 19:37:03 +00:00
|
|
|
|
2021-09-02 17:40:29 +00:00
|
|
|
MergeTreeBackgroundExecutor(
|
|
|
|
Type type_,
|
|
|
|
CountGetter && threads_count_getter_,
|
|
|
|
CountGetter && max_task_count_getter_,
|
|
|
|
CurrentMetrics::Metric metric_)
|
|
|
|
: type(type_)
|
|
|
|
, threads_count_getter(threads_count_getter_)
|
|
|
|
, max_task_count_getter(max_task_count_getter_)
|
|
|
|
, metric(metric_)
|
2021-08-30 19:37:03 +00:00
|
|
|
{
|
2021-09-02 10:39:27 +00:00
|
|
|
name = toString(type);
|
2021-09-02 17:40:29 +00:00
|
|
|
|
|
|
|
updateConfiguration();
|
2021-08-30 19:37:03 +00:00
|
|
|
scheduler = ThreadFromGlobalPool([this]() { schedulerThreadFunction(); });
|
|
|
|
}
|
|
|
|
|
|
|
|
~MergeTreeBackgroundExecutor()
|
|
|
|
{
|
|
|
|
wait();
|
|
|
|
}
|
|
|
|
|
2021-09-03 22:15:20 +00:00
|
|
|
bool trySchedule(ExecutableTaskPtr task);
|
2021-08-30 19:37:03 +00:00
|
|
|
|
2021-08-31 11:02:39 +00:00
|
|
|
void removeTasksCorrespondingToStorage(StorageID id);
|
2021-08-30 19:37:03 +00:00
|
|
|
|
2021-09-03 22:15:20 +00:00
|
|
|
void wait();
|
2021-08-30 19:37:03 +00:00
|
|
|
|
2021-08-31 23:20:23 +00:00
|
|
|
size_t activeCount()
|
2021-08-31 14:54:24 +00:00
|
|
|
{
|
2021-08-31 23:20:23 +00:00
|
|
|
std::lock_guard lock(mutex);
|
|
|
|
return active.size();
|
2021-08-31 14:54:24 +00:00
|
|
|
}
|
|
|
|
|
2021-08-31 23:20:23 +00:00
|
|
|
size_t pendingCount()
|
2021-08-31 14:54:24 +00:00
|
|
|
{
|
|
|
|
std::lock_guard lock(mutex);
|
2021-08-31 23:20:23 +00:00
|
|
|
return pending.size();
|
2021-08-31 14:54:24 +00:00
|
|
|
}
|
|
|
|
|
2021-08-30 19:37:03 +00:00
|
|
|
private:
|
|
|
|
|
2021-09-03 22:15:20 +00:00
|
|
|
void updateConfiguration();
|
2021-08-30 19:37:03 +00:00
|
|
|
|
2021-09-02 10:39:27 +00:00
|
|
|
static String toString(Type type);
|
2021-08-30 19:37:03 +00:00
|
|
|
|
2021-09-02 10:39:27 +00:00
|
|
|
Type type;
|
|
|
|
String name;
|
2021-08-30 19:37:03 +00:00
|
|
|
CountGetter threads_count_getter;
|
|
|
|
CountGetter max_task_count_getter;
|
|
|
|
CurrentMetrics::Metric metric;
|
|
|
|
|
2021-09-02 17:40:29 +00:00
|
|
|
size_t threads_count{0};
|
|
|
|
size_t max_tasks_count{0};
|
|
|
|
|
|
|
|
AtomicStopwatch update_timer;
|
|
|
|
|
2021-08-31 18:07:24 +00:00
|
|
|
struct Item
|
|
|
|
{
|
2021-08-31 23:20:23 +00:00
|
|
|
explicit Item(ExecutableTaskPtr && task_, CurrentMetrics::Metric metric_)
|
|
|
|
: task(std::move(task_))
|
|
|
|
, increment(std::move(metric_))
|
|
|
|
{
|
|
|
|
}
|
2021-08-31 18:07:24 +00:00
|
|
|
|
|
|
|
ExecutableTaskPtr task;
|
|
|
|
CurrentMetrics::Increment increment;
|
2021-09-07 12:45:39 +00:00
|
|
|
std::atomic_bool is_currently_deleting{false};
|
2021-09-03 13:02:49 +00:00
|
|
|
Poco::Event is_done;
|
2021-08-31 18:07:24 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
using ItemPtr = std::shared_ptr<Item>;
|
|
|
|
|
2021-09-03 12:27:49 +00:00
|
|
|
void routine(ItemPtr item);
|
|
|
|
void schedulerThreadFunction();
|
|
|
|
|
2021-09-02 17:40:29 +00:00
|
|
|
/// Initially it will be empty
|
2021-09-03 22:15:20 +00:00
|
|
|
boost::circular_buffer<ItemPtr> pending{0};
|
|
|
|
boost::circular_buffer<ItemPtr> active{0};
|
2021-08-31 11:02:39 +00:00
|
|
|
|
2021-08-30 19:37:03 +00:00
|
|
|
std::mutex mutex;
|
|
|
|
std::condition_variable has_tasks;
|
|
|
|
|
2021-09-03 22:15:20 +00:00
|
|
|
std::atomic_bool shutdown{false};
|
2021-08-30 19:37:03 +00:00
|
|
|
|
|
|
|
ThreadPool pool;
|
|
|
|
ThreadFromGlobalPool scheduler;
|
|
|
|
};
|
|
|
|
|
|
|
|
}
|