ClickHouse/src/Interpreters/AsynchronousInsertQueue.cpp
2024-04-10 21:21:42 +02:00

1073 lines
38 KiB
C++

#include <Interpreters/AsynchronousInsertQueue.h>
#include <Access/Common/AccessFlags.h>
#include <Access/EnabledQuota.h>
#include <Core/Settings.h>
#include <Formats/FormatFactory.h>
#include <IO/ConcatReadBuffer.h>
#include <IO/LimitReadBuffer.h>
#include <IO/ReadBufferFromMemory.h>
#include <IO/ReadBufferFromString.h>
#include <IO/copyData.h>
#include <Interpreters/AsynchronousInsertLog.h>
#include <Interpreters/Context.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/InterpreterInsertQuery.h>
#include <Interpreters/ProcessList.h>
#include <Interpreters/executeQuery.h>
#include <Parsers/ASTInsertQuery.h>
#include <Parsers/formatAST.h>
#include <Parsers/queryToString.h>
#include <Processors/Executors/CompletedPipelineExecutor.h>
#include <Processors/Executors/StreamingFormatExecutor.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <Processors/Transforms/AddingDefaultsTransform.h>
#include <Processors/Transforms/getSourceFromASTInsertQuery.h>
#include <QueryPipeline/BlockIO.h>
#include <QueryPipeline/Pipe.h>
#include <QueryPipeline/QueryPipeline.h>
#include <Storages/IStorage.h>
#include <Common/CurrentThread.h>
#include <Common/DateLUT.h>
#include <Common/FieldVisitorHash.h>
#include <Common/SensitiveDataMasker.h>
#include <Common/SipHash.h>
#include <Common/logger_useful.h>
namespace CurrentMetrics
{
extern const Metric PendingAsyncInsert;
extern const Metric AsynchronousInsertThreads;
extern const Metric AsynchronousInsertThreadsActive;
extern const Metric AsynchronousInsertThreadsScheduled;
extern const Metric AsynchronousInsertQueueSize;
extern const Metric AsynchronousInsertQueueBytes;
}
namespace ProfileEvents
{
extern const Event AsyncInsertQuery;
extern const Event AsyncInsertBytes;
extern const Event AsyncInsertRows;
extern const Event FailedAsyncInsertQuery;
}
namespace DB
{
namespace ErrorCodes
{
extern const int TIMEOUT_EXCEEDED;
extern const int UNKNOWN_EXCEPTION;
extern const int UNKNOWN_FORMAT;
extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
extern const int INVALID_SETTING_VALUE;
}
static const NameSet settings_to_skip
{
/// We don't consider this setting because it is only for deduplication,
/// which means we can put two inserts with different tokens in the same block safely.
"insert_deduplication_token",
"log_comment",
};
AsynchronousInsertQueue::InsertQuery::InsertQuery(
const ASTPtr & query_,
const std::optional<UUID> & user_id_,
const std::vector<UUID> & current_roles_,
const Settings & settings_,
DataKind data_kind_)
: query(query_->clone())
, query_str(queryToString(query))
, user_id(user_id_)
, current_roles(current_roles_)
, settings(settings_)
, data_kind(data_kind_)
{
SipHash siphash;
siphash.update(data_kind);
query->updateTreeHash(siphash, /*ignore_aliases=*/ true);
if (user_id)
{
siphash.update(*user_id);
for (const auto & current_role : current_roles)
siphash.update(current_role);
}
for (const auto & setting : settings.allChanged())
{
if (settings_to_skip.contains(setting.getName()))
continue;
setting_changes.emplace_back(setting.getName(), setting.getValue());
siphash.update(setting.getName());
applyVisitor(FieldVisitorHash(siphash), setting.getValue());
}
hash = siphash.get128();
}
AsynchronousInsertQueue::InsertQuery &
AsynchronousInsertQueue::InsertQuery::operator=(const InsertQuery & other)
{
if (this != &other)
{
query = other.query->clone();
query_str = other.query_str;
user_id = other.user_id;
current_roles = other.current_roles;
settings = other.settings;
data_kind = other.data_kind;
hash = other.hash;
setting_changes = other.setting_changes;
}
return *this;
}
bool AsynchronousInsertQueue::InsertQuery::operator==(const InsertQuery & other) const
{
return toTupleCmp() == other.toTupleCmp();
}
AsynchronousInsertQueue::InsertData::Entry::Entry(
DataChunk && chunk_,
String && query_id_,
const String & async_dedup_token_,
const String & format_,
MemoryTracker * user_memory_tracker_)
: chunk(std::move(chunk_))
, query_id(std::move(query_id_))
, async_dedup_token(async_dedup_token_)
, format(format_)
, user_memory_tracker(user_memory_tracker_)
, create_time(std::chrono::system_clock::now())
{
}
void AsynchronousInsertQueue::InsertData::Entry::resetChunk()
{
if (chunk.empty())
return;
// To avoid races on counter of user's MemoryTracker we should free memory at this moment.
// Entries data must be destroyed in context of user who runs async insert.
// Each entry in the list may correspond to a different user,
// so we need to switch current thread's MemoryTracker.
MemoryTrackerSwitcher switcher(user_memory_tracker);
chunk = {};
}
void AsynchronousInsertQueue::InsertData::Entry::finish(std::exception_ptr exception_)
{
if (finished.exchange(true))
return;
resetChunk();
if (exception_)
{
promise.set_exception(exception_);
ProfileEvents::increment(ProfileEvents::FailedAsyncInsertQuery, 1);
}
else
{
promise.set_value();
}
}
AsynchronousInsertQueue::QueueShardFlushTimeHistory::TimePoints
AsynchronousInsertQueue::QueueShardFlushTimeHistory::getRecentTimePoints() const
{
std::shared_lock lock(mutex);
return time_points;
}
void AsynchronousInsertQueue::QueueShardFlushTimeHistory::updateWithCurrentTime()
{
std::unique_lock lock(mutex);
time_points.first = time_points.second;
time_points.second = std::chrono::steady_clock::now();
}
AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t pool_size_, bool flush_on_shutdown_)
: WithContext(context_)
, pool_size(pool_size_)
, flush_on_shutdown(flush_on_shutdown_)
, queue_shards(pool_size)
, flush_time_history_per_queue_shard(pool_size)
, pool(
CurrentMetrics::AsynchronousInsertThreads,
CurrentMetrics::AsynchronousInsertThreadsActive,
CurrentMetrics::AsynchronousInsertThreadsScheduled,
pool_size)
{
if (!pool_size)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "pool_size cannot be zero");
const auto & settings = getContext()->getSettingsRef();
for (size_t i = 0; i < pool_size; ++i)
queue_shards[i].busy_timeout_ms
= std::min(Milliseconds(settings.async_insert_busy_timeout_min_ms), Milliseconds(settings.async_insert_busy_timeout_max_ms));
for (size_t i = 0; i < pool_size; ++i)
dump_by_first_update_threads.emplace_back([this, i] { processBatchDeadlines(i); });
}
void AsynchronousInsertQueue::flushAndShutdown()
{
try
{
LOG_TRACE(log, "Shutting down the asynchronous insertion queue");
shutdown = true;
for (size_t i = 0; i < pool_size; ++i)
{
auto & shard = queue_shards[i];
shard.are_tasks_available.notify_one();
chassert(dump_by_first_update_threads[i].joinable());
dump_by_first_update_threads[i].join();
if (flush_on_shutdown)
{
for (auto & [_, elem] : shard.queue)
scheduleDataProcessingJob(elem.key, std::move(elem.data), getContext(), i);
}
else
{
for (auto & [_, elem] : shard.queue)
for (const auto & entry : elem.data->entries)
entry->finish(
std::make_exception_ptr(Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Wait for async insert timeout exceeded)")));
}
}
pool.wait();
LOG_TRACE(log, "Asynchronous insertion queue finished");
}
catch (...)
{
tryLogCurrentException(log);
pool.wait();
}
}
AsynchronousInsertQueue::~AsynchronousInsertQueue()
{
for (const auto & shard : queue_shards)
{
for (const auto & [first_update, elem] : shard.queue)
{
const auto & insert_query = elem.key.query->as<const ASTInsertQuery &>();
LOG_WARNING(log, "Has unprocessed async insert for {}.{}",
backQuoteIfNeed(insert_query.getDatabase()), backQuoteIfNeed(insert_query.getTable()));
}
}
}
void AsynchronousInsertQueue::scheduleDataProcessingJob(
const InsertQuery & key, InsertDataPtr data, ContextPtr global_context, size_t shard_num)
{
/// Intuitively it seems reasonable to process first inserted blocks first.
/// We add new chunks in the end of entries list, so they are automatically ordered by creation time
chassert(!data->entries.empty());
const auto priority = Priority{data->entries.front()->create_time.time_since_epoch().count()};
/// Wrap 'unique_ptr' with 'shared_ptr' to make this
/// lambda copyable and allow to save it to the thread pool.
auto data_shared = std::make_shared<InsertDataPtr>(std::move(data));
try
{
pool.scheduleOrThrowOnError(
[this, key, global_context, shard_num, my_data = data_shared]() mutable
{ processData(key, std::move(*my_data), std::move(global_context), flush_time_history_per_queue_shard[shard_num]); },
priority);
}
catch (...)
{
for (auto & entry : (**data_shared).entries)
entry->finish(std::current_exception());
}
}
void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const ContextPtr & query_context)
{
auto & insert_query = query->as<ASTInsertQuery &>();
insert_query.async_insert_flush = true;
InterpreterInsertQuery interpreter(query, query_context, query_context->getSettingsRef().insert_allow_materialized_columns);
auto table = interpreter.getTable(insert_query);
auto sample_block = InterpreterInsertQuery::getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context);
if (!FormatFactory::instance().isInputFormat(insert_query.format))
throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown input format {}", insert_query.format);
/// For table functions we check access while executing
/// InterpreterInsertQuery::getTable() -> ITableFunction::execute().
if (insert_query.table_id)
query_context->checkAccess(AccessType::INSERT, insert_query.table_id, sample_block.getNames());
}
AsynchronousInsertQueue::PushResult
AsynchronousInsertQueue::pushQueryWithInlinedData(ASTPtr query, ContextPtr query_context)
{
query = query->clone();
preprocessInsertQuery(query, query_context);
String bytes;
{
/// Read at most 'async_insert_max_data_size' bytes of data.
/// If limit is exceeded we will fallback to synchronous insert
/// to avoid buffering of huge amount of data in memory.
auto read_buf = getReadBufferFromASTInsertQuery(query);
LimitReadBuffer limit_buf(
*read_buf, query_context->getSettingsRef().async_insert_max_data_size,
/*throw_exception=*/ false, /*exact_limit=*/ {});
WriteBufferFromString write_buf(bytes);
copyData(limit_buf, write_buf);
if (!read_buf->eof())
{
write_buf.finalize();
/// Concat read buffer with already extracted from insert
/// query data and with the rest data from insert query.
std::vector<std::unique_ptr<ReadBuffer>> buffers;
buffers.emplace_back(std::make_unique<ReadBufferFromOwnString>(bytes));
buffers.emplace_back(std::move(read_buf));
return PushResult
{
.status = PushResult::TOO_MUCH_DATA,
.future = {},
.insert_data_buffer = std::make_unique<ConcatReadBuffer>(std::move(buffers)),
};
}
}
return pushDataChunk(std::move(query), std::move(bytes), std::move(query_context));
}
AsynchronousInsertQueue::PushResult
AsynchronousInsertQueue::pushQueryWithBlock(ASTPtr query, Block block, ContextPtr query_context)
{
query = query->clone();
preprocessInsertQuery(query, query_context);
return pushDataChunk(std::move(query), std::move(block), std::move(query_context));
}
AsynchronousInsertQueue::PushResult
AsynchronousInsertQueue::pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr query_context)
{
const auto & settings = query_context->getSettingsRef();
validateSettings(settings, log);
auto & insert_query = query->as<ASTInsertQuery &>();
auto data_kind = chunk.getDataKind();
auto entry = std::make_shared<InsertData::Entry>(
std::move(chunk), query_context->getCurrentQueryId(),
settings.insert_deduplication_token, insert_query.format,
CurrentThread::getUserMemoryTracker());
/// If data is parsed on client we don't care of format which is written
/// in INSERT query. Replace it to put all such queries into one bucket in queue.
if (data_kind == DataKind::Preprocessed)
insert_query.format = "Native";
InsertQuery key{query, query_context->getUserID(), query_context->getCurrentRoles(), settings, data_kind};
InsertDataPtr data_to_process;
std::future<void> insert_future;
auto shard_num = key.hash % pool_size;
auto & shard = queue_shards[shard_num];
const auto flush_time_points = flush_time_history_per_queue_shard[shard_num].getRecentTimePoints();
{
std::lock_guard lock(shard.mutex);
auto [it, inserted] = shard.iterators.try_emplace(key.hash);
auto now = std::chrono::steady_clock::now();
auto timeout_ms = getBusyWaitTimeoutMs(settings, shard, flush_time_points, now);
if (timeout_ms != shard.busy_timeout_ms)
{
LOG_TRACE(
log,
"Asynchronous timeout {} from {} to {} for queue shard {}.",
timeout_ms < shard.busy_timeout_ms ? "decreased" : "increased",
shard.busy_timeout_ms.count(),
timeout_ms.count(),
size_t(shard_num));
}
if (inserted)
it->second = shard.queue.emplace(now + timeout_ms, Container{key, std::make_unique<InsertData>(timeout_ms)}).first;
auto queue_it = it->second;
auto & data = queue_it->second.data;
size_t entry_data_size = entry->chunk.byteSize();
assert(data);
auto size_in_bytes = data->size_in_bytes;
data->size_in_bytes += entry_data_size;
/// We rely on the fact that entries are being added to the list in order of creation time in `scheduleDataProcessingJob()`
data->entries.emplace_back(entry);
insert_future = entry->getFuture();
LOG_TRACE(log, "Have {} pending inserts with total {} bytes of data for query '{}'",
data->entries.size(), data->size_in_bytes, key.query_str);
bool has_enough_bytes = data->size_in_bytes >= key.settings.async_insert_max_data_size;
bool has_enough_queries = data->entries.size() >= key.settings.async_insert_max_query_number && key.settings.async_insert_deduplicate;
auto max_busy_timeout_exceeded = [&shard, &settings, &now, &flush_time_points]() -> bool
{
if (!settings.async_insert_use_adaptive_busy_timeout || !shard.last_insert_time || !flush_time_points.first)
return false;
auto max_ms = Milliseconds(settings.async_insert_busy_timeout_max_ms);
return *shard.last_insert_time + max_ms < now && *flush_time_points.first + max_ms < *flush_time_points.second;
};
/// Here we check whether we have hit the limit on the maximum data size in the buffer or
/// if the elapsed time since the last insert exceeds the maximum busy wait timeout.
/// We also use the limit settings from the query context.
/// This works because queries with the same set of settings are already grouped together.
if (!flush_stopped && (has_enough_bytes || has_enough_queries || max_busy_timeout_exceeded()))
{
data->timeout_ms = Milliseconds::zero();
data_to_process = std::move(data);
shard.iterators.erase(it);
shard.queue.erase(queue_it);
}
shard.last_insert_time = now;
shard.busy_timeout_ms = timeout_ms;
CurrentMetrics::add(CurrentMetrics::PendingAsyncInsert);
ProfileEvents::increment(ProfileEvents::AsyncInsertQuery);
ProfileEvents::increment(ProfileEvents::AsyncInsertBytes, entry_data_size);
if (data_to_process)
{
if (!inserted)
CurrentMetrics::sub(CurrentMetrics::AsynchronousInsertQueueSize);
CurrentMetrics::sub(CurrentMetrics::AsynchronousInsertQueueBytes, size_in_bytes);
}
else
{
if (inserted)
CurrentMetrics::add(CurrentMetrics::AsynchronousInsertQueueSize);
CurrentMetrics::add(CurrentMetrics::AsynchronousInsertQueueBytes, entry_data_size);
}
}
if (data_to_process)
scheduleDataProcessingJob(key, std::move(data_to_process), getContext(), shard_num);
else
shard.are_tasks_available.notify_one();
return PushResult
{
.status = PushResult::OK,
.future = std::move(insert_future),
.insert_data_buffer = nullptr,
};
}
AsynchronousInsertQueue::Milliseconds AsynchronousInsertQueue::getBusyWaitTimeoutMs(
const Settings & settings,
const QueueShard & shard,
const QueueShardFlushTimeHistory::TimePoints & flush_time_points,
std::chrono::steady_clock::time_point now) const
{
if (!settings.async_insert_use_adaptive_busy_timeout)
return settings.async_insert_busy_timeout_max_ms;
const auto max_ms = Milliseconds(settings.async_insert_busy_timeout_max_ms);
const auto min_ms = std::min(std::max(Milliseconds(settings.async_insert_busy_timeout_min_ms), Milliseconds(1)), max_ms);
auto normalize = [&min_ms, &max_ms](const auto & t_ms) { return std::min(std::max(t_ms, min_ms), max_ms); };
if (!shard.last_insert_time || !flush_time_points.first)
return normalize(shard.busy_timeout_ms);
const auto & last_insert_time = *shard.last_insert_time;
const auto & [t1, t2] = std::tie(*flush_time_points.first, *flush_time_points.second);
const double increase_rate = settings.async_insert_busy_timeout_increase_rate;
const double decrease_rate = settings.async_insert_busy_timeout_decrease_rate;
const auto decreased_timeout_ms = std::min(
std::chrono::duration_cast<Milliseconds>(shard.busy_timeout_ms / (1.0 + decrease_rate)), shard.busy_timeout_ms - Milliseconds(1));
/// Increase the timeout for frequent inserts.
if (last_insert_time + min_ms > now)
{
auto timeout_ms = std::max(
std::chrono::duration_cast<Milliseconds>(shard.busy_timeout_ms * (1.0 + increase_rate)),
shard.busy_timeout_ms + Milliseconds(1));
return normalize(timeout_ms);
}
/// Decrease the timeout if inserts are not frequent,
/// that is, if the time since the last insert and the difference between the last two queue flushes were both
/// long enough (exceeding the adjusted timeout).
/// This ensures the timeout value converges to the minimum over time for non-frequent inserts.
else if (last_insert_time + decreased_timeout_ms < now && t1 + decreased_timeout_ms < t2)
return normalize(decreased_timeout_ms);
return normalize(shard.busy_timeout_ms);
}
void AsynchronousInsertQueue::validateSettings(const Settings & settings, LoggerPtr log)
{
const auto max_ms = std::chrono::milliseconds(settings.async_insert_busy_timeout_max_ms);
if (max_ms == std::chrono::milliseconds::zero())
throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Setting 'async_insert_busy_timeout_max_ms' can't be zero");
if (!settings.async_insert_use_adaptive_busy_timeout)
return;
/// Adaptive timeout settings.
const auto min_ms = std::chrono::milliseconds(settings.async_insert_busy_timeout_min_ms);
if (min_ms > max_ms && log)
LOG_WARNING(
log,
"Setting 'async_insert_busy_timeout_min_ms'={} is greater than 'async_insert_busy_timeout_max_ms'={}. Ignoring "
"'async_insert_busy_timeout_min_ms'",
min_ms.count(),
max_ms.count());
if (settings.async_insert_busy_timeout_increase_rate <= 0)
throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Setting 'async_insert_busy_timeout_increase_rate' must be greater than zero");
if (settings.async_insert_busy_timeout_decrease_rate <= 0)
throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Setting 'async_insert_busy_timeout_decrease_rate' must be greater than zero");
}
void AsynchronousInsertQueue::flushAll()
{
std::lock_guard flush_lock(flush_mutex);
LOG_DEBUG(log, "Requested to flush asynchronous insert queue");
/// Disable background flushes to avoid adding new elements to the queue.
flush_stopped = true;
std::vector<Queue> queues_to_flush(pool_size);
for (size_t i = 0; i < pool_size; ++i)
{
std::lock_guard lock(queue_shards[i].mutex);
queues_to_flush[i] = std::move(queue_shards[i].queue);
queue_shards[i].iterators.clear();
}
size_t total_queries = 0;
size_t total_bytes = 0;
size_t total_entries = 0;
for (size_t i = 0; i < pool_size; ++i)
{
auto & queue = queues_to_flush[i];
total_queries += queue.size();
for (auto & [_, entry] : queue)
{
total_bytes += entry.data->size_in_bytes;
total_entries += entry.data->entries.size();
scheduleDataProcessingJob(entry.key, std::move(entry.data), getContext(), i);
}
}
/// Note that jobs scheduled before the call of 'flushAll' are not counted here.
LOG_DEBUG(log,
"Will wait for finishing of {} flushing jobs (about {} inserts, {} bytes, {} distinct queries)",
pool.active(), total_entries, total_bytes, total_queries);
/// Wait until all jobs are finished. That includes also jobs
/// that were scheduled before the call of 'flushAll'.
pool.wait();
LOG_DEBUG(log, "Finished flushing of asynchronous insert queue");
flush_stopped = false;
}
void AsynchronousInsertQueue::processBatchDeadlines(size_t shard_num)
{
auto & shard = queue_shards[shard_num];
while (!shutdown)
{
std::vector<Container> entries_to_flush;
{
std::unique_lock lock(shard.mutex);
const auto rel_time
= std::min(shard.busy_timeout_ms, Milliseconds(getContext()->getSettingsRef().async_insert_poll_timeout_ms));
shard.are_tasks_available.wait_for(
lock,
rel_time,
[&shard, this]
{
if (shutdown)
return true;
if (!shard.queue.empty() && shard.queue.begin()->first < std::chrono::steady_clock::now())
return true;
return false;
});
if (shutdown)
return;
if (flush_stopped)
continue;
const auto now = std::chrono::steady_clock::now();
size_t size_in_bytes = 0;
while (true)
{
if (shard.queue.empty() || shard.queue.begin()->first > now)
break;
auto it = shard.queue.begin();
size_in_bytes += it->second.data->size_in_bytes;
shard.iterators.erase(it->second.key.hash);
entries_to_flush.emplace_back(std::move(it->second));
shard.queue.erase(it);
}
if (!entries_to_flush.empty())
{
CurrentMetrics::sub(CurrentMetrics::AsynchronousInsertQueueSize, entries_to_flush.size());
CurrentMetrics::sub(CurrentMetrics::AsynchronousInsertQueueBytes, size_in_bytes);
}
}
for (auto & entry : entries_to_flush)
scheduleDataProcessingJob(entry.key, std::move(entry.data), getContext(), shard_num);
}
}
namespace
{
using TimePoint = std::chrono::time_point<std::chrono::system_clock>;
void appendElementsToLogSafe(
AsynchronousInsertLog & log,
std::vector<AsynchronousInsertLogElement> elements,
TimePoint flush_time,
const String & flush_query_id,
const String & flush_exception)
try
{
using Status = AsynchronousInsertLogElement::Status;
for (auto & elem : elements)
{
elem.flush_time = timeInSeconds(flush_time);
elem.flush_time_microseconds = timeInMicroseconds(flush_time);
elem.flush_query_id = flush_query_id;
elem.exception = flush_exception;
elem.status = flush_exception.empty() ? Status::Ok : Status::FlushError;
log.add(std::move(elem));
}
}
catch (...)
{
tryLogCurrentException("AsynchronousInsertQueue", "Failed to add elements to AsynchronousInsertLog");
}
String serializeQuery(const IAST & query, size_t max_length)
{
return query.hasSecretParts()
? query.formatForLogging(max_length)
: wipeSensitiveDataAndCutToLength(serializeAST(query), max_length);
}
}
// static
void AsynchronousInsertQueue::processData(
InsertQuery key, InsertDataPtr data, ContextPtr global_context, QueueShardFlushTimeHistory & queue_shard_flush_time_history)
try
{
if (!data)
return;
SCOPE_EXIT(CurrentMetrics::sub(CurrentMetrics::PendingAsyncInsert, data->entries.size()));
const auto log = getLogger("AsynchronousInsertQueue");
const auto & insert_query = assert_cast<const ASTInsertQuery &>(*key.query);
auto insert_context = Context::createCopy(global_context);
bool internal = false; // To enable logging this query
bool async_insert = true;
/// Disabled query spans. Could be activated by initializing this to a SpanHolder
std::shared_ptr<OpenTelemetry::SpanHolder> query_span{nullptr};
/// 'resetParser' doesn't work for parallel parsing.
key.settings.set("input_format_parallel_parsing", false);
/// It maybe insert into distributed table.
/// It doesn't make sense to make insert into destination tables asynchronous.
key.settings.set("async_insert", false);
insert_context->makeQueryContext();
/// Access rights must be checked for the user who executed the initial INSERT query.
if (key.user_id)
insert_context->setUser(*key.user_id, key.current_roles);
insert_context->setSettings(key.settings);
/// Set initial_query_id, because it's used in InterpreterInsertQuery for table lock.
insert_context->setCurrentQueryId("");
auto insert_query_id = insert_context->getCurrentQueryId();
auto query_start_time = std::chrono::system_clock::now();
Stopwatch start_watch{CLOCK_MONOTONIC};
insert_context->setQueryKind(ClientInfo::QueryKind::INITIAL_QUERY);
insert_context->setInitialQueryStartTime(query_start_time);
insert_context->setCurrentQueryId(insert_query_id);
insert_context->setInitialQueryId(insert_query_id);
DB::CurrentThread::QueryScope query_scope_holder(insert_context);
auto query_for_logging = serializeQuery(*key.query, insert_context->getSettingsRef().log_queries_cut_to_length);
/// We add it to the process list so
/// a) it appears in system.processes
/// b) can be cancelled if we want to
/// c) has an associated process list element where runtime metrics are stored
auto process_list_entry = insert_context->getProcessList().insert(
query_for_logging,
key.query.get(),
insert_context,
start_watch.getStart());
auto query_status = process_list_entry->getQueryStatus();
insert_context->setProcessListElement(std::move(query_status));
String query_database;
String query_table;
if (insert_query.table_id)
{
query_database = insert_query.table_id.getDatabaseName();
query_table = insert_query.table_id.getTableName();
insert_context->setInsertionTable(insert_query.table_id);
}
std::unique_ptr<DB::IInterpreter> interpreter;
QueryPipeline pipeline;
QueryLogElement query_log_elem;
auto async_insert_log = global_context->getAsynchronousInsertLog();
std::vector<AsynchronousInsertLogElement> log_elements;
if (async_insert_log)
log_elements.reserve(data->entries.size());
try
{
interpreter = std::make_unique<InterpreterInsertQuery>(
key.query, insert_context, key.settings.insert_allow_materialized_columns, false, false, true);
pipeline = interpreter->execute().pipeline;
chassert(pipeline.pushing());
query_log_elem = logQueryStart(
query_start_time,
insert_context,
query_for_logging,
key.query,
pipeline,
interpreter,
internal,
query_database,
query_table,
async_insert);
}
catch (...)
{
logExceptionBeforeStart(query_for_logging, insert_context, key.query, query_span, start_watch.elapsedMilliseconds());
throw;
}
auto add_entry_to_log = [&](const auto & entry,
const auto & entry_query_for_logging,
const auto & exception,
size_t num_rows,
size_t num_bytes,
Milliseconds timeout_ms)
{
if (!async_insert_log)
return;
AsynchronousInsertLogElement elem;
elem.event_time = timeInSeconds(entry->create_time);
elem.event_time_microseconds = timeInMicroseconds(entry->create_time);
elem.query_for_logging = entry_query_for_logging;
elem.database = query_database;
elem.table = query_table;
elem.format = entry->format;
elem.query_id = entry->query_id;
elem.bytes = num_bytes;
elem.rows = num_rows;
elem.exception = exception;
elem.data_kind = entry->chunk.getDataKind();
elem.timeout_milliseconds = timeout_ms.count();
/// If there was a parsing error,
/// the entry won't be flushed anyway,
/// so add the log element immediately.
if (!elem.exception.empty())
{
elem.status = AsynchronousInsertLogElement::ParsingError;
async_insert_log->add(std::move(elem));
}
else
{
log_elements.push_back(elem);
}
};
auto finish_entries = [&]
{
for (const auto & entry : data->entries)
{
if (!entry->isFinished())
entry->finish();
}
if (!log_elements.empty())
{
auto flush_time = std::chrono::system_clock::now();
appendElementsToLogSafe(*async_insert_log, std::move(log_elements), flush_time, insert_query_id, "");
}
};
Chunk chunk;
auto header = pipeline.getHeader();
if (key.data_kind == DataKind::Parsed)
chunk = processEntriesWithParsing(key, data, header, insert_context, log, add_entry_to_log);
else
chunk = processPreprocessedEntries(key, data, header, insert_context, add_entry_to_log);
ProfileEvents::increment(ProfileEvents::AsyncInsertRows, chunk.getNumRows());
if (chunk.getNumRows() == 0)
{
finish_entries();
return;
}
try
{
size_t num_rows = chunk.getNumRows();
size_t num_bytes = chunk.bytes();
auto source = std::make_shared<SourceFromSingleChunk>(header, std::move(chunk));
pipeline.complete(Pipe(std::move(source)));
CompletedPipelineExecutor completed_executor(pipeline);
completed_executor.execute();
LOG_INFO(log, "Flushed {} rows, {} bytes for query '{}'", num_rows, num_bytes, key.query_str);
queue_shard_flush_time_history.updateWithCurrentTime();
bool pulling_pipeline = false;
logQueryFinish(query_log_elem, insert_context, key.query, pipeline, pulling_pipeline, query_span, QueryCache::Usage::None, internal);
}
catch (...)
{
bool log_error = true;
logQueryException(query_log_elem, insert_context, start_watch, key.query, query_span, internal, log_error);
if (!log_elements.empty())
{
auto exception = getCurrentExceptionMessage(false);
auto flush_time = std::chrono::system_clock::now();
appendElementsToLogSafe(*async_insert_log, std::move(log_elements), flush_time, insert_query_id, exception);
}
throw;
}
finish_entries();
}
catch (const Exception & e)
{
finishWithException(key.query, data->entries, e);
}
catch (const Poco::Exception & e)
{
finishWithException(key.query, data->entries, e);
}
catch (const std::exception & e)
{
finishWithException(key.query, data->entries, e);
}
catch (...)
{
finishWithException(key.query, data->entries, Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Unknown exception"));
}
template <typename LogFunc>
Chunk AsynchronousInsertQueue::processEntriesWithParsing(
const InsertQuery & key,
const InsertDataPtr & data,
const Block & header,
const ContextPtr & insert_context,
LoggerPtr logger,
LogFunc && add_to_async_insert_log)
{
size_t total_rows = 0;
InsertData::EntryPtr current_entry;
String current_exception;
const auto & insert_query = assert_cast<const ASTInsertQuery &>(*key.query);
auto format = getInputFormatFromASTInsertQuery(key.query, false, header, insert_context, nullptr);
std::shared_ptr<ISimpleTransform> adding_defaults_transform;
if (insert_context->getSettingsRef().input_format_defaults_for_omitted_fields && insert_query.table_id)
{
StoragePtr storage = DatabaseCatalog::instance().getTable(insert_query.table_id, insert_context);
auto metadata_snapshot = storage->getInMemoryMetadataPtr();
const auto & columns = metadata_snapshot->getColumns();
if (columns.hasDefaults())
adding_defaults_transform = std::make_shared<AddingDefaultsTransform>(header, columns, *format, insert_context);
}
auto on_error = [&](const MutableColumns & result_columns, Exception & e)
{
current_exception = e.displayText();
LOG_ERROR(logger, "Failed parsing for query '{}' with query id {}. {}",
key.query_str, current_entry->query_id, current_exception);
for (const auto & column : result_columns)
if (column->size() > total_rows)
column->popBack(column->size() - total_rows);
current_entry->finish(std::current_exception());
return 0;
};
StreamingFormatExecutor executor(header, format, std::move(on_error), std::move(adding_defaults_transform));
auto chunk_info = std::make_shared<AsyncInsertInfo>();
auto query_for_logging = serializeQuery(*key.query, insert_context->getSettingsRef().log_queries_cut_to_length);
for (const auto & entry : data->entries)
{
current_entry = entry;
const auto * bytes = entry->chunk.asString();
if (!bytes)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Expected entry with data kind Parsed. Got: {}", entry->chunk.getDataKind());
auto buffer = std::make_unique<ReadBufferFromString>(*bytes);
size_t num_bytes = bytes->size();
size_t num_rows = executor.execute(*buffer);
total_rows += num_rows;
chunk_info->offsets.push_back(total_rows);
chunk_info->tokens.push_back(entry->async_dedup_token);
add_to_async_insert_log(entry, query_for_logging, current_exception, num_rows, num_bytes, data->timeout_ms);
current_exception.clear();
entry->resetChunk();
}
Chunk chunk(executor.getResultColumns(), total_rows);
chunk.setChunkInfo(std::move(chunk_info));
return chunk;
}
template <typename LogFunc>
Chunk AsynchronousInsertQueue::processPreprocessedEntries(
const InsertQuery & key,
const InsertDataPtr & data,
const Block & header,
const ContextPtr & insert_context,
LogFunc && add_to_async_insert_log)
{
size_t total_rows = 0;
auto chunk_info = std::make_shared<AsyncInsertInfo>();
auto result_columns = header.cloneEmptyColumns();
std::unordered_map<String, String> format_to_query;
auto get_query_by_format = [&](const String & format) -> const String &
{
auto [it, inserted] = format_to_query.try_emplace(format);
if (!inserted)
return it->second;
auto query = key.query->clone();
assert_cast<ASTInsertQuery &>(*query).format = format;
it->second = serializeQuery(*query, insert_context->getSettingsRef().log_queries_cut_to_length);
return it->second;
};
for (const auto & entry : data->entries)
{
const auto * block = entry->chunk.asBlock();
if (!block)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Expected entry with data kind Preprocessed. Got: {}", entry->chunk.getDataKind());
auto columns = block->getColumns();
for (size_t i = 0, s = columns.size(); i < s; ++i)
result_columns[i]->insertRangeFrom(*columns[i], 0, columns[i]->size());
total_rows += block->rows();
chunk_info->offsets.push_back(total_rows);
chunk_info->tokens.push_back(entry->async_dedup_token);
const auto & query_for_logging = get_query_by_format(entry->format);
add_to_async_insert_log(entry, query_for_logging, "", block->rows(), block->bytes(), data->timeout_ms);
entry->resetChunk();
}
Chunk chunk(std::move(result_columns), total_rows);
chunk.setChunkInfo(std::move(chunk_info));
return chunk;
}
template <typename E>
void AsynchronousInsertQueue::finishWithException(
const ASTPtr & query, const std::list<InsertData::EntryPtr> & entries, const E & exception)
{
tryLogCurrentException("AsynchronousInsertQueue", fmt::format("Failed insertion for query '{}'", queryToString(query)));
for (const auto & entry : entries)
{
if (!entry->isFinished())
{
/// Make a copy of exception to avoid concurrent usage of
/// one exception object from several threads.
entry->finish(std::make_exception_ptr(exception));
}
}
}
}