ClickHouse/src/Interpreters/AsynchronousInsertQueue.h

147 lines
5.1 KiB
C++
Raw Normal View History

2021-03-04 11:10:21 +00:00
#pragma once
#include <Parsers/IAST_fwd.h>
#include <Common/RWLock.h>
#include <Common/ThreadPool.h>
2021-03-17 14:11:47 +00:00
#include <Core/Settings.h>
2021-08-31 02:16:02 +00:00
#include <Poco/Logger.h>
2021-03-04 11:10:21 +00:00
#include <unordered_map>
namespace DB
{
class ASTInsertQuery;
struct BlockIO;
2021-09-09 16:10:53 +00:00
/// A queue, that stores data for insert queries and periodically flushes it to tables.
/// The data is grouped by table, format and settings of insert query.
2021-08-31 02:16:02 +00:00
class AsynchronousInsertQueue : public WithContext
2021-03-04 11:10:21 +00:00
{
2021-08-31 02:16:02 +00:00
public:
using Milliseconds = std::chrono::milliseconds;
/// Using structure to allow and benefit from designated initialization and not mess with a positional arguments in ctor.
struct Timeout
{
Milliseconds busy;
Milliseconds stale;
2021-08-31 02:16:02 +00:00
};
AsynchronousInsertQueue(ContextPtr context_, size_t pool_size, size_t max_data_size, const Timeout & timeouts);
~AsynchronousInsertQueue();
2021-09-01 23:18:09 +00:00
void push(ASTPtr query, ContextPtr query_context);
2021-08-31 02:16:02 +00:00
void waitForProcessingQuery(const String & query_id, const Milliseconds & timeout);
private:
struct InsertQuery
{
ASTPtr query;
Settings settings;
2021-09-04 00:57:05 +00:00
InsertQuery(const ASTPtr & query_, const Settings & settings_);
InsertQuery(const InsertQuery & other);
InsertQuery & operator=(const InsertQuery & other);
2021-08-31 02:16:02 +00:00
bool operator==(const InsertQuery & other) const;
struct Hash { UInt64 operator()(const InsertQuery & insert_query) const; };
};
struct InsertData
{
struct Entry
2021-04-19 14:51:26 +00:00
{
2021-08-31 02:16:02 +00:00
public:
String bytes;
String query_id;
void finish(std::exception_ptr exception_ = nullptr);
2021-09-01 23:18:09 +00:00
bool wait(const Milliseconds & timeout) const;
bool isFinished() const;
std::exception_ptr getException() const;
2021-08-31 02:16:02 +00:00
private:
2021-09-01 23:18:09 +00:00
mutable std::mutex mutex;
mutable std::condition_variable cv;
bool finished = false;
std::exception_ptr exception;
2021-04-19 14:51:26 +00:00
};
2021-08-31 02:16:02 +00:00
using EntryPtr = std::shared_ptr<Entry>;
2021-03-04 11:10:21 +00:00
2021-08-31 02:16:02 +00:00
std::list<EntryPtr> entries;
size_t size = 0;
2021-03-04 11:10:21 +00:00
2021-08-31 02:16:02 +00:00
/// Timestamp of the first insert into queue, or after the last queue dump.
/// Used to detect for how long the queue is active, so we can dump it by timer.
std::chrono::time_point<std::chrono::steady_clock> first_update = std::chrono::steady_clock::now();
2021-08-31 02:16:02 +00:00
/// Timestamp of the last insert into queue.
/// Used to detect for how long the queue is stale, so we can dump it by another timer.
std::chrono::time_point<std::chrono::steady_clock> last_update;
};
2021-08-27 21:29:10 +00:00
2021-08-31 02:16:02 +00:00
using InsertDataPtr = std::unique_ptr<InsertData>;
2021-03-04 11:10:21 +00:00
2021-09-09 16:10:53 +00:00
/// A separate container, that holds a data and a mutex for it.
/// When it's needed to process current chunk of data, it can be moved for processing
/// and new data can be recreated without holding a lock during processing.
2021-08-31 02:16:02 +00:00
struct Container
{
std::mutex mutex;
InsertDataPtr data;
};
2021-03-04 11:10:21 +00:00
2021-08-31 02:16:02 +00:00
using Queue = std::unordered_map<InsertQuery, std::shared_ptr<Container>, InsertQuery::Hash>;
using QueueIterator = Queue::iterator;
2021-09-03 16:46:09 +00:00
mutable std::shared_mutex rwlock;
2021-08-31 02:16:02 +00:00
Queue queue;
2021-03-04 11:10:21 +00:00
2021-09-01 23:18:09 +00:00
using QueryIdToEntry = std::unordered_map<String, InsertData::EntryPtr>;
2021-09-03 16:46:09 +00:00
mutable std::mutex currently_processing_mutex;
2021-09-01 23:18:09 +00:00
QueryIdToEntry currently_processing_queries;
2021-04-19 14:51:26 +00:00
2021-08-31 02:16:02 +00:00
/// Logic and events behind queue are as follows:
/// - reset_timeout: if queue is empty for some time, then we delete the queue and free all associated resources, e.g. tables.
/// - busy_timeout: if queue is active for too long and there are a lot of rapid inserts, then we dump the data, so it doesn't
/// grow for a long period of time and users will be able to select new data in deterministic manner.
/// - stale_timeout: if queue is stale for too long, then we dump the data too, so that users will be able to select the last
/// piece of inserted data.
/// - access_timeout: also we have to check if user still has access to the tables periodically, and if the access is lost, then
/// we dump pending data and delete queue immediately.
/// - max_data_size: if the maximum size of data is reached, then again we dump the data.
2021-03-04 11:10:21 +00:00
2021-08-31 02:16:02 +00:00
const size_t max_data_size; /// in bytes
const Milliseconds busy_timeout;
const Milliseconds stale_timeout;
2021-03-04 11:10:21 +00:00
2021-08-31 02:16:02 +00:00
std::atomic<bool> shutdown{false};
ThreadPool pool; /// dump the data only inside this pool.
ThreadFromGlobalPool dump_by_first_update_thread; /// uses busy_timeout and busyCheck()
ThreadFromGlobalPool dump_by_last_update_thread; /// uses stale_timeout and staleCheck()
2021-09-09 16:10:53 +00:00
ThreadFromGlobalPool cleanup_thread; /// uses busy_timeout and cleanup()
2021-03-04 11:10:21 +00:00
2021-08-31 02:16:02 +00:00
Poco::Logger * log = &Poco::Logger::get("AsynchronousInsertQueue");
2021-03-04 11:10:21 +00:00
2021-08-31 02:16:02 +00:00
void busyCheck();
void staleCheck();
void cleanup();
2021-04-19 14:51:26 +00:00
2021-09-01 15:06:11 +00:00
void scheduleProcessDataJob(const InsertQuery & key, InsertDataPtr data, ContextPtr global_context);
2021-08-31 02:16:02 +00:00
static void processData(InsertQuery key, InsertDataPtr data, ContextPtr global_context);
2021-09-01 23:18:09 +00:00
public:
2021-09-03 16:46:09 +00:00
Queue getQueue() const
{
std::shared_lock lock(rwlock);
return queue;
}
2021-03-04 11:10:21 +00:00
};
}