ClickHouse/src/Interpreters/executeQuery.cpp

1133 lines
43 KiB
C++
Raw Normal View History

#include <Common/formatReadable.h>
2019-10-07 18:56:03 +00:00
#include <Common/PODArray.h>
2017-07-13 20:58:19 +00:00
#include <Common/typeid_cast.h>
#include <Common/ThreadProfileEvents.h>
2021-09-01 23:18:09 +00:00
#include <Interpreters/AsynchronousInsertQueue.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/WriteBufferFromVector.h>
#include <IO/LimitReadBuffer.h>
#include <IO/copyData.h>
#include <DataStreams/BlockIO.h>
#include <Processors/Transforms/CountingTransform.h>
#include <Processors/Transforms/getSourceFromASTInsertQuery.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTInsertQuery.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTDropQuery.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTRenameQuery.h>
#include <Parsers/ASTAlterQuery.h>
2020-11-02 19:23:26 +00:00
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTShowProcesslistQuery.h>
2020-06-15 20:01:58 +00:00
#include <Parsers/ASTWatchQuery.h>
#include <Parsers/Lexer.h>
#include <Parsers/parseQuery.h>
#include <Parsers/ParserQuery.h>
#include <Parsers/queryNormalization.h>
#include <Parsers/queryToString.h>
2021-07-16 10:10:56 +00:00
#include <Formats/FormatFactory.h>
2019-05-28 18:30:10 +00:00
#include <Storages/StorageInput.h>
#include <Access/EnabledQuota.h>
2021-02-25 07:47:08 +00:00
#include <Interpreters/ApplyWithGlobalVisitor.h>
#include <Interpreters/Context.h>
#include <Interpreters/InterpreterFactory.h>
2021-02-25 07:47:08 +00:00
#include <Interpreters/InterpreterSetQuery.h>
#include <Interpreters/NormalizeSelectWithUnionQueryVisitor.h>
2020-10-22 16:47:20 +00:00
#include <Interpreters/OpenTelemetrySpanLog.h>
2021-02-25 07:47:08 +00:00
#include <Interpreters/ProcessList.h>
#include <Interpreters/QueryLog.h>
2019-05-18 21:07:23 +00:00
#include <Interpreters/ReplaceQueryParameterVisitor.h>
2020-12-18 06:54:38 +00:00
#include <Interpreters/SelectQueryOptions.h>
#include <Interpreters/executeQuery.h>
2021-08-12 11:42:51 +00:00
#include <Interpreters/SelectIntersectExceptQueryVisitor.h>
2019-07-19 07:44:18 +00:00
#include <Common/ProfileEvents.h>
#include <Common/SensitiveDataMasker.h>
2021-09-19 20:15:10 +00:00
#include <DataStreams/materializeBlock.h>
#include <IO/CompressionMethod.h>
2011-10-30 11:30:52 +00:00
2019-03-26 18:28:37 +00:00
#include <Processors/Transforms/LimitsCheckingTransform.h>
#include <Processors/Transforms/MaterializingTransform.h>
2019-03-26 18:28:37 +00:00
#include <Processors/Formats/IOutputFormat.h>
2021-09-15 19:35:48 +00:00
#include <Processors/Executors/CompletedPipelineExecutor.h>
2021-08-27 21:29:10 +00:00
#include <Processors/Sources/WaitForAsyncInsertSource.h>
2011-10-30 11:30:52 +00:00
2021-08-25 14:41:21 +00:00
#include <random>
2020-03-28 03:02:26 +00:00
2019-06-20 07:17:21 +00:00
namespace ProfileEvents
{
extern const Event QueryMaskingRulesMatch;
extern const Event FailedQuery;
extern const Event FailedInsertQuery;
extern const Event FailedSelectQuery;
extern const Event QueryTimeMicroseconds;
extern const Event SelectQueryTimeMicroseconds;
extern const Event InsertQueryTimeMicroseconds;
2019-06-20 07:17:21 +00:00
}
2011-10-30 11:30:52 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int INTO_OUTFILE_NOT_ALLOWED;
extern const int QUERY_WAS_CANCELLED;
2021-07-01 16:43:59 +00:00
extern const int LOGICAL_ERROR;
2021-09-17 12:05:54 +00:00
extern const int NOT_IMPLEMENTED;
}
2011-10-30 11:30:52 +00:00
static void checkASTSizeLimits(const IAST & ast, const Settings & settings)
{
if (settings.max_ast_depth)
ast.checkDepth(settings.max_ast_depth);
if (settings.max_ast_elements)
ast.checkSize(settings.max_ast_elements);
}
2020-06-15 20:01:58 +00:00
static String joinLines(const String & query)
{
2020-06-15 20:01:58 +00:00
/// Care should be taken. We don't join lines inside non-whitespace tokens (e.g. multiline string literals)
2020-06-15 20:05:21 +00:00
/// and we don't join line after comment (because it can be single-line comment).
2020-06-15 20:01:58 +00:00
/// All other whitespaces replaced to a single whitespace.
String res;
const char * begin = query.data();
const char * end = begin + query.size();
Lexer lexer(begin, end);
Token token = lexer.nextToken();
for (; !token.isEnd(); token = lexer.nextToken())
{
if (token.type == TokenType::Whitespace)
{
res += ' ';
}
else if (token.type == TokenType::Comment)
{
res.append(token.begin, token.end);
if (token.end < end && *token.end == '\n')
res += '\n';
}
else
res.append(token.begin, token.end);
}
return res;
}
static String prepareQueryForLogging(const String & query, ContextPtr context)
2019-06-20 07:17:21 +00:00
{
String res = query;
// wiping sensitive data before cropping query by log_queries_cut_to_length,
// otherwise something like credit card without last digit can go to log
2020-04-22 06:01:33 +00:00
if (auto * masker = SensitiveDataMasker::getInstance())
2019-06-20 07:17:21 +00:00
{
auto matches = masker->wipeSensitiveData(res);
if (matches > 0)
{
ProfileEvents::increment(ProfileEvents::QueryMaskingRulesMatch, matches);
}
}
res = res.substr(0, context->getSettingsRef().log_queries_cut_to_length);
2019-06-20 07:17:21 +00:00
return res;
}
/// Log query into text log (not into system table).
static void logQuery(const String & query, ContextPtr context, bool internal)
{
2019-03-04 18:28:42 +00:00
if (internal)
{
2020-05-30 21:57:37 +00:00
LOG_DEBUG(&Poco::Logger::get("executeQuery"), "(internal) {}", joinLines(query));
2019-03-04 18:28:42 +00:00
}
else
{
const auto & client_info = context->getClientInfo();
2020-09-08 13:19:27 +00:00
const auto & current_query_id = client_info.current_query_id;
const auto & initial_query_id = client_info.initial_query_id;
const auto & current_user = client_info.current_user;
2019-03-04 18:28:42 +00:00
String comment = context->getSettingsRef().log_comment;
size_t max_query_size = context->getSettingsRef().max_query_size;
2021-01-25 19:18:23 +00:00
if (comment.size() > max_query_size)
comment.resize(max_query_size);
if (!comment.empty())
comment = fmt::format(" (comment: {})", comment);
2021-07-03 15:48:03 +00:00
LOG_DEBUG(&Poco::Logger::get("executeQuery"), "(from {}{}{}){} {}",
2020-09-08 13:19:27 +00:00
client_info.current_address.toString(),
(current_user != "default" ? ", user: " + current_user : ""),
2020-05-23 21:41:35 +00:00
(!initial_query_id.empty() && current_query_id != initial_query_id ? ", initial_query_id: " + initial_query_id : std::string()),
2021-01-25 19:18:23 +00:00
comment,
2020-05-23 21:41:35 +00:00
joinLines(query));
2020-08-28 19:02:50 +00:00
2021-05-04 22:42:14 +00:00
if (client_info.client_trace_context.trace_id != UUID())
2020-09-08 13:19:27 +00:00
{
LOG_TRACE(&Poco::Logger::get("executeQuery"),
2020-11-18 17:43:18 +00:00
"OpenTelemetry traceparent '{}'",
client_info.client_trace_context.composeTraceparentHeader());
2020-09-08 13:19:27 +00:00
}
2019-03-04 18:28:42 +00:00
}
}
/// Call this inside catch block.
static void setExceptionStackTrace(QueryLogElement & elem)
{
/// Disable memory tracker for stack trace.
/// Because if exception is "Memory limit (for query) exceed", then we probably can't allocate another one string.
MemoryTracker::BlockerInThread temporarily_disable_memory_tracker(VariableContext::Global);
try
{
throw;
}
catch (const std::exception & e)
{
elem.stack_trace = getExceptionStackTraceString(e);
}
catch (...) {}
}
/// Log exception (with query info) into text log (not into system table).
static void logException(ContextPtr context, QueryLogElement & elem)
{
2021-01-25 19:18:23 +00:00
String comment;
if (!elem.log_comment.empty())
comment = fmt::format(" (comment: {})", elem.log_comment);
2020-05-23 21:41:35 +00:00
if (elem.stack_trace.empty())
LOG_ERROR(
&Poco::Logger::get("executeQuery"),
"{} (from {}){} (in query: {})",
elem.exception,
context->getClientInfo().current_address.toString(),
comment,
joinLines(elem.query));
2020-05-23 21:41:35 +00:00
else
LOG_ERROR(
&Poco::Logger::get("executeQuery"),
"{} (from {}){} (in query: {})"
2020-05-23 21:41:35 +00:00
", Stack trace (when copying this message, always include the lines below):\n\n{}",
elem.exception,
context->getClientInfo().current_address.toString(),
comment,
joinLines(elem.query),
elem.stack_trace);
}
inline UInt64 time_in_microseconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
{
return std::chrono::duration_cast<std::chrono::microseconds>(timepoint.time_since_epoch()).count();
}
inline UInt64 time_in_seconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
{
return std::chrono::duration_cast<std::chrono::seconds>(timepoint.time_since_epoch()).count();
}
static void onExceptionBeforeStart(const String & query_for_logging, ContextPtr context, UInt64 current_time_us, ASTPtr ast)
{
/// Exception before the query execution.
if (auto quota = context->getQuota())
quota->used(Quota::ERRORS, 1, /* check_exceeded = */ false);
const Settings & settings = context->getSettingsRef();
/// Log the start of query execution into the table if necessary.
QueryLogElement elem;
elem.type = QueryLogElementType::EXCEPTION_BEFORE_START;
// all callers to onExceptionBeforeStart method construct the timespec for event_time and
// event_time_microseconds from the same time point. So, it can be assumed that both of these
2020-10-27 11:04:03 +00:00
// times are equal up to the precision of a second.
elem.event_time = current_time_us / 1000000;
elem.event_time_microseconds = current_time_us;
elem.query_start_time = current_time_us / 1000000;
elem.query_start_time_microseconds = current_time_us;
elem.current_database = context->getCurrentDatabase();
2019-06-20 07:17:21 +00:00
elem.query = query_for_logging;
elem.normalized_query_hash = normalizedQueryHash<false>(query_for_logging);
2021-08-04 14:09:23 +00:00
// Try log query_kind if ast is valid
if (ast)
{
2021-08-04 14:09:23 +00:00
elem.query_kind = ast->getQueryKindString();
if (settings.log_formatted_queries)
elem.formatted_query = queryToString(ast);
}
2021-08-04 14:09:23 +00:00
// We don't calculate databases, tables and columns when the query isn't able to start
2020-01-22 12:29:30 +00:00
elem.exception_code = getCurrentExceptionCode();
elem.exception = getCurrentExceptionMessage(false);
elem.client_info = context->getClientInfo();
2021-01-25 19:18:23 +00:00
elem.log_comment = settings.log_comment;
if (elem.log_comment.size() > settings.max_query_size)
elem.log_comment.resize(settings.max_query_size);
2020-12-28 12:57:27 +00:00
2018-08-23 01:31:28 +00:00
if (settings.calculate_text_stack_trace)
setExceptionStackTrace(elem);
logException(context, elem);
/// Update performance counters before logging to query_log
CurrentThread::finalizePerformanceCounters();
if (settings.log_queries && elem.type >= settings.log_queries_min_type && !settings.log_queries_min_query_duration_ms.totalMilliseconds())
if (auto query_log = context->getQueryLog())
query_log->add(elem);
if (auto opentelemetry_span_log = context->getOpenTelemetrySpanLog();
2021-05-04 22:42:14 +00:00
context->query_trace_context.trace_id != UUID()
2020-10-22 16:47:20 +00:00
&& opentelemetry_span_log)
2020-08-27 18:44:20 +00:00
{
OpenTelemetrySpanLogElement span;
span.trace_id = context->query_trace_context.trace_id;
span.span_id = context->query_trace_context.span_id;
span.parent_span_id = context->getClientInfo().client_trace_context.span_id;
2020-08-27 18:44:20 +00:00
span.operation_name = "query";
span.start_time_us = current_time_us;
span.finish_time_us = current_time_us;
2020-08-27 18:44:20 +00:00
2020-11-12 23:27:18 +00:00
/// Keep values synchronized to type enum in QueryLogElement::createBlock.
2020-09-08 13:19:27 +00:00
span.attribute_names.push_back("clickhouse.query_status");
2020-08-27 18:44:20 +00:00
span.attribute_values.push_back("ExceptionBeforeStart");
2020-09-08 13:19:27 +00:00
span.attribute_names.push_back("db.statement");
2020-08-27 18:44:20 +00:00
span.attribute_values.push_back(elem.query);
2020-09-08 13:19:27 +00:00
span.attribute_names.push_back("clickhouse.query_id");
2020-08-27 18:44:20 +00:00
span.attribute_values.push_back(elem.client_info.current_query_id);
if (!context->query_trace_context.tracestate.empty())
2020-08-28 01:21:08 +00:00
{
2020-09-08 13:19:27 +00:00
span.attribute_names.push_back("clickhouse.tracestate");
2020-08-28 01:21:08 +00:00
span.attribute_values.push_back(
context->query_trace_context.tracestate);
2020-08-28 01:21:08 +00:00
}
2020-10-22 16:47:20 +00:00
opentelemetry_span_log->add(span);
2020-08-27 18:44:20 +00:00
}
ProfileEvents::increment(ProfileEvents::FailedQuery);
if (ast)
{
if (ast->as<ASTSelectQuery>() || ast->as<ASTSelectWithUnionQuery>())
{
ProfileEvents::increment(ProfileEvents::FailedSelectQuery);
}
else if (ast->as<ASTInsertQuery>())
{
ProfileEvents::increment(ProfileEvents::FailedInsertQuery);
}
}
}
2021-05-31 14:49:02 +00:00
static void setQuerySpecificSettings(ASTPtr & ast, ContextMutablePtr context)
{
2021-03-04 11:10:21 +00:00
if (auto * ast_insert_into = ast->as<ASTInsertQuery>())
{
if (ast_insert_into->watch)
context->setSetting("output_format_enable_streaming", 1);
}
}
static void applySettingsFromSelectWithUnion(const ASTSelectWithUnionQuery & select_with_union, ContextMutablePtr context)
{
const ASTs & children = select_with_union.list_of_selects->children;
if (children.empty())
return;
// We might have an arbitrarily complex UNION tree, so just give
// up if the last first-order child is not a plain SELECT.
// It is flattened later, when we process UNION ALL/DISTINCT.
const auto * last_select = children.back()->as<ASTSelectQuery>();
if (last_select && last_select->settings())
{
InterpreterSetQuery(last_select->settings(), context).executeForCurrentContext();
}
}
2015-06-18 02:11:05 +00:00
static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
const char * begin,
const char * end,
2021-05-31 14:49:02 +00:00
ContextMutablePtr context,
bool internal,
2019-02-08 13:24:24 +00:00
QueryProcessingStage::Enum stage,
ReadBuffer * istr)
{
const auto current_time = std::chrono::system_clock::now();
auto & client_info = context->getClientInfo();
2021-06-11 15:06:25 +00:00
// If it's not an internal query and we don't see an initial_query_start_time yet, initialize it
// to current time. Internal queries are those executed without an independent client context,
// thus should not set initial_query_start_time, because it might introduce data race. It's also
// possible to have unset initial_query_start_time for non-internal and non-initial queries. For
// example, the query is from an initiator that is running an old version of clickhouse.
if (!internal && client_info.initial_query_start_time == 0)
{
client_info.initial_query_start_time = time_in_seconds(current_time);
client_info.initial_query_start_time_microseconds = time_in_microseconds(current_time);
}
2021-01-29 15:11:44 +00:00
#if !defined(ARCADIA_BUILD)
assert(internal || CurrentThread::get().getQueryContext());
assert(internal || CurrentThread::get().getQueryContext()->getCurrentQueryId() == CurrentThread::getQueryId());
2021-01-29 15:11:44 +00:00
#endif
const Settings & settings = context->getSettingsRef();
ASTPtr ast;
2019-01-18 16:30:35 +00:00
const char * query_end;
/// Don't limit the size of internal queries.
size_t max_query_size = 0;
if (!internal) max_query_size = settings.max_query_size;
String query_database;
String query_table;
try
{
ParserQuery parser(end);
/// TODO: parser should fail early when max_query_size limit is reached.
ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth);
2020-11-02 19:23:26 +00:00
/// Interpret SETTINGS clauses as early as possible (before invoking the corresponding interpreter),
/// to allow settings to take effect.
if (const auto * select_query = ast->as<ASTSelectQuery>())
{
if (auto new_settings = select_query->settings())
InterpreterSetQuery(new_settings, context).executeForCurrentContext();
}
else if (const auto * select_with_union_query = ast->as<ASTSelectWithUnionQuery>())
{
applySettingsFromSelectWithUnion(*select_with_union_query, context);
2020-11-02 19:23:26 +00:00
}
else if (const auto * query_with_output = dynamic_cast<const ASTQueryWithOutput *>(ast.get()))
{
if (query_with_output->settings_ast)
InterpreterSetQuery(query_with_output->settings_ast, context).executeForCurrentContext();
}
if (const auto * query_with_table_output = dynamic_cast<const ASTQueryWithTableAndOutput *>(ast.get()))
{
query_database = query_with_table_output->database;
query_table = query_with_table_output->table;
}
if (auto * create_query = ast->as<ASTCreateQuery>())
{
if (create_query->select)
{
applySettingsFromSelectWithUnion(create_query->select->as<ASTSelectWithUnionQuery &>(), context);
}
}
2019-03-11 13:22:51 +00:00
auto * insert_query = ast->as<ASTInsertQuery>();
2019-03-29 14:50:48 +00:00
if (insert_query && insert_query->settings_ast)
InterpreterSetQuery(insert_query->settings_ast, context).executeForCurrentContext();
2021-02-19 15:18:28 +00:00
if (insert_query)
2019-02-08 13:24:24 +00:00
{
2021-02-19 15:18:28 +00:00
if (insert_query->data)
query_end = insert_query->data;
else
query_end = end;
2021-02-15 18:57:35 +00:00
insert_query->tail = istr;
2019-02-08 13:24:24 +00:00
}
2019-01-18 16:30:35 +00:00
else
2019-06-15 18:22:48 +00:00
{
2019-01-18 16:30:35 +00:00
query_end = end;
2019-06-15 18:22:48 +00:00
}
}
catch (...)
{
2019-03-04 18:28:42 +00:00
/// Anyway log the query.
String query = String(begin, begin + std::min(end - begin, static_cast<ptrdiff_t>(max_query_size)));
2019-06-20 07:17:21 +00:00
auto query_for_logging = prepareQueryForLogging(query, context);
logQuery(query_for_logging, context, internal);
2019-03-04 18:28:42 +00:00
if (!internal)
{
onExceptionBeforeStart(query_for_logging, context, time_in_microseconds(current_time), ast);
}
throw;
}
setQuerySpecificSettings(ast, context);
2021-08-30 09:52:35 +00:00
/// There is an option of probabilistic logging of queries.
/// If it is used - do the random sampling and "collapse" the settings.
/// It allows to consistently log queries with all the subqueries in distributed query processing
/// (subqueries on remote nodes will receive these "collapsed" settings)
if (!internal && settings.log_queries && settings.log_queries_probability < 1.0)
{
std::bernoulli_distribution should_write_log{settings.log_queries_probability};
context->setSetting("log_queries", should_write_log(thread_local_rng));
context->setSetting("log_queries_probability", 1.0);
}
2019-01-18 16:30:35 +00:00
/// Copy query into string. It will be written to log and presented in processlist. If an INSERT query, string will not include data to insertion.
String query(begin, query_end);
BlockIO res;
2020-03-09 00:28:05 +00:00
String query_for_logging;
2019-06-20 07:17:21 +00:00
try
{
2019-06-15 18:22:48 +00:00
/// Replace ASTQueryParameter with ASTLiteral for prepared statements.
if (context->hasQueryParameters())
2019-06-16 17:32:37 +00:00
{
ReplaceQueryParameterVisitor visitor(context->getQueryParameters());
2019-06-16 17:32:37 +00:00
visitor.visit(ast);
query = serializeAST(*ast);
2020-10-08 09:06:04 +00:00
}
2019-06-15 18:22:48 +00:00
2021-03-04 11:10:21 +00:00
/// MUST go before any modification (except for prepared statements,
/// since it substitute parameters and w/o them query does not contain
/// parameters), to keep query as-is in query_log and server log.
query_for_logging = prepareQueryForLogging(query, context);
logQuery(query_for_logging, context, internal);
2020-10-08 09:06:04 +00:00
/// Propagate WITH statement to children ASTSelect.
if (settings.enable_global_with_statement)
{
ApplyWithGlobalVisitor().visit(ast);
}
2021-08-12 11:42:51 +00:00
{
2021-08-13 09:57:15 +00:00
SelectIntersectExceptQueryVisitor::Data data;
SelectIntersectExceptQueryVisitor{data}.visit(ast);
2021-08-12 11:42:51 +00:00
}
{
2021-08-13 09:57:15 +00:00
/// Normalize SelectWithUnionQuery
NormalizeSelectWithUnionQueryVisitor::Data data{context->getSettingsRef().union_default_mode};
NormalizeSelectWithUnionQueryVisitor{data}.visit(ast);
2021-08-12 11:42:51 +00:00
}
2021-02-25 07:47:08 +00:00
/// Check the limits.
checkASTSizeLimits(*ast, settings);
/// Put query to process list. But don't put SHOW PROCESSLIST query itself.
ProcessList::EntryPtr process_list_entry;
2019-03-11 13:22:51 +00:00
if (!internal && !ast->as<ASTShowProcesslistQuery>())
{
2019-06-20 07:17:21 +00:00
/// processlist also has query masked now, to avoid secrets leaks though SHOW PROCESSLIST by other users.
process_list_entry = context->getProcessList().insert(query_for_logging, ast.get(), context);
context->setProcessListElement(&process_list_entry->get());
}
/// Load external tables if they were provided
context->initializeExternalTablesIfSet();
2019-05-28 18:30:10 +00:00
auto * insert_query = ast->as<ASTInsertQuery>();
2021-03-17 14:11:47 +00:00
2021-04-15 10:53:26 +00:00
if (insert_query && insert_query->table_id)
2021-03-17 14:11:47 +00:00
/// Resolve database before trying to use async insert feature - to properly hash the query.
insert_query->table_id = context->resolveStorageID(insert_query->table_id);
2021-03-17 14:11:47 +00:00
if (insert_query && insert_query->select)
2019-05-28 18:30:10 +00:00
{
/// Prepare Input storage before executing interpreter if we already got a buffer with data.
2019-05-28 18:30:10 +00:00
if (istr)
{
ASTPtr input_function;
2019-05-30 21:33:06 +00:00
insert_query->tryFindInputFunction(input_function);
if (input_function)
{
StoragePtr storage = context->executeTableFunction(input_function);
auto & input_storage = dynamic_cast<StorageInput &>(*storage);
auto input_metadata_snapshot = input_storage.getInMemoryMetadataPtr();
auto pipe = getSourceFromASTInsertQuery(
ast, true, input_metadata_snapshot->getSampleBlock(), context, input_function);
2021-07-20 18:18:43 +00:00
input_storage.setPipe(std::move(pipe));
}
2019-05-28 18:30:10 +00:00
}
}
else
/// reset Input callbacks if query is not INSERT SELECT
context->resetInputCallbacks();
2019-05-28 18:30:10 +00:00
auto * queue = context->getAsynchronousInsertQueue();
const bool async_insert = queue
&& insert_query && !insert_query->select
2021-09-16 17:18:34 +00:00
&& insert_query->hasInlinedData() && settings.async_insert;
if (async_insert)
{
2021-09-01 23:18:09 +00:00
queue->push(ast, context);
2021-08-27 21:29:10 +00:00
BlockIO io;
if (settings.wait_for_async_insert)
{
auto timeout = settings.wait_for_async_insert_timeout.totalMilliseconds();
2021-09-01 23:18:09 +00:00
auto query_id = context->getCurrentQueryId();
auto source = std::make_shared<WaitForAsyncInsertSource>(query_id, timeout, *queue);
2021-09-17 17:52:26 +00:00
io.pipeline = QueryPipeline(Pipe(std::move(source)));
2021-08-27 21:29:10 +00:00
}
return std::make_tuple(ast, std::move(io));
}
2020-12-18 06:54:38 +00:00
auto interpreter = InterpreterFactory::get(ast, context, SelectQueryOptions(stage).setInternal(internal));
2019-03-26 18:28:37 +00:00
std::shared_ptr<const EnabledQuota> quota;
if (!interpreter->ignoreQuota())
{
quota = context->getQuota();
if (quota)
{
if (ast->as<ASTSelectQuery>() || ast->as<ASTSelectWithUnionQuery>())
{
quota->used(Quota::QUERY_SELECTS, 1);
}
else if (ast->as<ASTInsertQuery>())
{
quota->used(Quota::QUERY_INSERTS, 1);
}
quota->used(Quota::QUERIES, 1);
quota->checkExceeded(Quota::ERRORS);
}
}
2020-09-15 10:40:39 +00:00
StreamLocalLimits limits;
if (!interpreter->ignoreLimits())
{
limits.mode = LimitsMode::LIMITS_CURRENT; //-V1048
limits.size_limits = SizeLimits(settings.max_result_rows, settings.max_result_bytes, settings.result_overflow_mode);
}
{
2020-11-19 15:52:11 +00:00
OpenTelemetrySpanHolder span("IInterpreter::execute()");
res = interpreter->execute();
}
2020-05-28 08:24:59 +00:00
QueryPipeline & pipeline = res.pipeline;
2019-03-26 18:28:37 +00:00
2020-04-22 06:01:33 +00:00
if (const auto * insert_interpreter = typeid_cast<const InterpreterInsertQuery *>(&*interpreter))
{
/// Save insertion table (not table function). TODO: support remote() table function.
2020-03-02 20:23:58 +00:00
auto table_id = insert_interpreter->getDatabaseTable();
if (!table_id.empty())
context->setInsertionTable(std::move(table_id));
}
if (process_list_entry)
{
/// Query was killed before execution
if ((*process_list_entry)->isKilled())
throw Exception("Query '" + (*process_list_entry)->getInfo().client_info.current_query_id + "' is killed in pending state",
ErrorCodes::QUERY_WAS_CANCELLED);
}
/// Hold element of process list till end of query execution.
res.process_list_entry = process_list_entry;
2021-09-21 06:57:55 +00:00
if (pipeline.pulling() || pipeline.completed())
{
/// Limits on the result, the quota on the result, and also callback for progress.
/// Limits apply only to the final result.
pipeline.setProgressCallback(context->getProgressCallback());
pipeline.setProcessListElement(context->getProcessListElement());
2021-09-21 06:57:55 +00:00
if (stage == QueryProcessingStage::Complete && pipeline.pulling())
2021-09-15 19:35:48 +00:00
pipeline.setLimitsAndQuota(limits, quota);
}
2021-09-15 19:35:48 +00:00
else if (pipeline.pushing())
{
2021-09-15 19:35:48 +00:00
pipeline.setProcessListElement(context->getProcessListElement());
}
/// Everything related to query log.
{
QueryLogElement elem;
elem.type = QueryLogElementType::QUERY_START; //-V1048
elem.event_time = time_in_seconds(current_time);
elem.event_time_microseconds = time_in_microseconds(current_time);
elem.query_start_time = time_in_seconds(current_time);
elem.query_start_time_microseconds = time_in_microseconds(current_time);
elem.current_database = context->getCurrentDatabase();
2019-06-20 07:17:21 +00:00
elem.query = query_for_logging;
if (settings.log_formatted_queries)
elem.formatted_query = queryToString(ast);
elem.normalized_query_hash = normalizedQueryHash<false>(query_for_logging);
elem.client_info = client_info;
bool log_queries = settings.log_queries && !internal;
/// Log into system table start of query execution, if need.
if (log_queries)
{
2021-09-19 18:53:36 +00:00
/// This check is not obvious, but without it 01220_scalar_optimization_in_alter fails.
if (pipeline.initialized())
2020-12-18 06:54:38 +00:00
{
const auto & info = context->getQueryAccessInfo();
2020-12-18 06:54:38 +00:00
elem.query_databases = info.databases;
elem.query_tables = info.tables;
elem.query_columns = info.columns;
elem.query_projections = info.projections;
elem.query_views = info.views;
2020-12-18 06:54:38 +00:00
}
interpreter->extendQueryLogElem(elem, ast, context, query_database, query_table);
if (settings.log_query_settings)
elem.query_settings = std::make_shared<Settings>(context->getSettingsRef());
2021-01-25 19:18:23 +00:00
elem.log_comment = settings.log_comment;
if (elem.log_comment.size() > settings.max_query_size)
elem.log_comment.resize(settings.max_query_size);
2020-12-28 02:38:16 +00:00
if (elem.type >= settings.log_queries_min_type && !settings.log_queries_min_query_duration_ms.totalMilliseconds())
{
if (auto query_log = context->getQueryLog())
query_log->add(elem);
}
}
/// Common code for finish and exception callbacks
auto status_info_to_query_log = [](QueryLogElement & element, const QueryStatusInfo & info, const ASTPtr query_ast, const ContextPtr context_ptr) mutable
{
DB::UInt64 query_time = info.elapsed_seconds * 1000000;
ProfileEvents::increment(ProfileEvents::QueryTimeMicroseconds, query_time);
if (query_ast->as<ASTSelectQuery>() || query_ast->as<ASTSelectWithUnionQuery>())
{
ProfileEvents::increment(ProfileEvents::SelectQueryTimeMicroseconds, query_time);
}
else if (query_ast->as<ASTInsertQuery>())
{
ProfileEvents::increment(ProfileEvents::InsertQueryTimeMicroseconds, query_time);
}
element.query_duration_ms = info.elapsed_seconds * 1000;
element.read_rows = info.read_rows;
element.read_bytes = info.read_bytes;
element.written_rows = info.written_rows;
element.written_bytes = info.written_bytes;
element.memory_usage = info.peak_memory_usage > 0 ? info.peak_memory_usage : 0;
element.thread_ids = std::move(info.thread_ids);
element.profile_counters = std::move(info.profile_counters);
/// We need to refresh the access info since dependent views might have added extra information, either during
/// creation of the view (PushingToViewsBlockOutputStream) or while executing its internal SELECT
const auto & access_info = context_ptr->getQueryAccessInfo();
2021-06-28 09:05:31 +00:00
element.query_databases.insert(access_info.databases.begin(), access_info.databases.end());
element.query_tables.insert(access_info.tables.begin(), access_info.tables.end());
element.query_columns.insert(access_info.columns.begin(), access_info.columns.end());
element.query_projections.insert(access_info.projections.begin(), access_info.projections.end());
element.query_views.insert(access_info.views.begin(), access_info.views.end());
const auto & factories_info = context_ptr->getQueryFactoriesInfo();
element.used_aggregate_functions = factories_info.aggregate_functions;
element.used_aggregate_function_combinators = factories_info.aggregate_function_combinators;
element.used_database_engines = factories_info.database_engines;
element.used_data_type_families = factories_info.data_type_families;
element.used_dictionaries = factories_info.dictionaries;
element.used_formats = factories_info.formats;
element.used_functions = factories_info.functions;
element.used_storages = factories_info.storages;
element.used_table_functions = factories_info.table_functions;
};
/// Also make possible for caller to log successful query finish and exception during execution.
auto finish_callback = [elem, context, ast,
log_queries,
log_queries_min_type = settings.log_queries_min_type,
log_queries_min_query_duration_ms = settings.log_queries_min_query_duration_ms.totalMilliseconds(),
2021-09-20 09:05:34 +00:00
status_info_to_query_log,
pulling_pipeline = pipeline.pulling()
]
2021-09-15 19:35:48 +00:00
(QueryPipeline & query_pipeline) mutable
{
QueryStatus * process_list_elem = context->getProcessListElement();
if (!process_list_elem)
return;
/// Update performance counters before logging to query_log
CurrentThread::finalizePerformanceCounters();
QueryStatusInfo info = process_list_elem->getInfo(true, context->getSettingsRef().log_profile_events);
double elapsed_seconds = info.elapsed_seconds;
elem.type = QueryLogElementType::QUERY_FINISH;
// construct event_time and event_time_microseconds using the same time point
// so that the two times will always be equal up to a precision of a second.
2020-09-30 17:36:02 +00:00
const auto finish_time = std::chrono::system_clock::now();
elem.event_time = time_in_seconds(finish_time);
elem.event_time_microseconds = time_in_microseconds(finish_time);
status_info_to_query_log(elem, info, ast, context);
auto progress_callback = context->getProgressCallback();
2019-05-06 06:57:48 +00:00
if (progress_callback)
progress_callback(Progress(WriteProgress(info.written_rows, info.written_bytes)));
2021-09-20 09:05:34 +00:00
if (pulling_pipeline)
{
2021-09-15 19:35:48 +00:00
query_pipeline.tryGetResultRowsAndBytes(elem.result_rows, elem.result_bytes);
}
else /// will be used only for ordinary INSERT queries
{
auto progress_out = process_list_elem->getProgressOut();
elem.result_rows = progress_out.read_rows;
elem.result_bytes = progress_out.read_bytes;
}
if (elem.read_rows != 0)
{
2020-05-30 21:57:37 +00:00
LOG_INFO(&Poco::Logger::get("executeQuery"), "Read {} rows, {} in {} sec., {} rows/sec., {}/sec.",
2020-05-30 21:35:52 +00:00
elem.read_rows, ReadableSize(elem.read_bytes), elapsed_seconds,
2020-05-23 21:41:35 +00:00
static_cast<size_t>(elem.read_rows / elapsed_seconds),
2020-05-30 21:35:52 +00:00
ReadableSize(elem.read_bytes / elapsed_seconds));
}
if (log_queries && elem.type >= log_queries_min_type && Int64(elem.query_duration_ms) >= log_queries_min_query_duration_ms)
{
if (auto query_log = context->getQueryLog())
query_log->add(elem);
}
2020-08-20 20:59:40 +00:00
if (auto opentelemetry_span_log = context->getOpenTelemetrySpanLog();
2021-05-04 22:42:14 +00:00
context->query_trace_context.trace_id != UUID()
2020-10-22 16:47:20 +00:00
&& opentelemetry_span_log)
2020-08-20 20:59:40 +00:00
{
2020-08-27 18:44:20 +00:00
OpenTelemetrySpanLogElement span;
span.trace_id = context->query_trace_context.trace_id;
span.span_id = context->query_trace_context.span_id;
span.parent_span_id = context->getClientInfo().client_trace_context.span_id;
2020-08-27 18:44:20 +00:00
span.operation_name = "query";
span.start_time_us = elem.query_start_time_microseconds;
2020-09-30 17:36:02 +00:00
span.finish_time_us = time_in_microseconds(finish_time);
2020-08-27 18:44:20 +00:00
2020-11-12 23:27:18 +00:00
/// Keep values synchronized to type enum in QueryLogElement::createBlock.
2020-09-08 13:19:27 +00:00
span.attribute_names.push_back("clickhouse.query_status");
2020-08-27 18:44:20 +00:00
span.attribute_values.push_back("QueryFinish");
2020-09-08 13:19:27 +00:00
span.attribute_names.push_back("db.statement");
2020-08-27 18:44:20 +00:00
span.attribute_values.push_back(elem.query);
2020-09-08 13:19:27 +00:00
span.attribute_names.push_back("clickhouse.query_id");
2020-08-27 18:44:20 +00:00
span.attribute_values.push_back(elem.client_info.current_query_id);
if (!context->query_trace_context.tracestate.empty())
2020-08-28 01:21:08 +00:00
{
2020-09-08 13:19:27 +00:00
span.attribute_names.push_back("clickhouse.tracestate");
2020-08-28 01:21:08 +00:00
span.attribute_values.push_back(
context->query_trace_context.tracestate);
2020-08-28 01:21:08 +00:00
}
2020-08-27 18:44:20 +00:00
2020-10-22 16:47:20 +00:00
opentelemetry_span_log->add(span);
2020-08-20 20:59:40 +00:00
}
};
auto exception_callback = [elem, context, ast,
log_queries,
log_queries_min_type = settings.log_queries_min_type,
log_queries_min_query_duration_ms = settings.log_queries_min_query_duration_ms.totalMilliseconds(),
quota(quota), status_info_to_query_log] () mutable
{
if (quota)
quota->used(Quota::ERRORS, 1, /* check_exceeded = */ false);
elem.type = QueryLogElementType::EXCEPTION_WHILE_PROCESSING;
// event_time and event_time_microseconds are being constructed from the same time point
2020-10-27 11:04:03 +00:00
// to ensure that both the times will be equal up to the precision of a second.
const auto time_now = std::chrono::system_clock::now();
elem.event_time = time_in_seconds(time_now);
elem.event_time_microseconds = time_in_microseconds(time_now);
elem.query_duration_ms = 1000 * (elem.event_time - elem.query_start_time);
2020-01-22 12:29:30 +00:00
elem.exception_code = getCurrentExceptionCode();
elem.exception = getCurrentExceptionMessage(false);
QueryStatus * process_list_elem = context->getProcessListElement();
const Settings & current_settings = context->getSettingsRef();
/// Update performance counters before logging to query_log
CurrentThread::finalizePerformanceCounters();
if (process_list_elem)
{
2018-08-27 18:16:32 +00:00
QueryStatusInfo info = process_list_elem->getInfo(true, current_settings.log_profile_events, false);
status_info_to_query_log(elem, info, ast, context);
}
2018-08-27 18:16:32 +00:00
if (current_settings.calculate_text_stack_trace)
2018-08-23 01:31:28 +00:00
setExceptionStackTrace(elem);
logException(context, elem);
/// In case of exception we log internal queries also
if (log_queries && elem.type >= log_queries_min_type && Int64(elem.query_duration_ms) >= log_queries_min_query_duration_ms)
{
if (auto query_log = context->getQueryLog())
query_log->add(elem);
}
ProfileEvents::increment(ProfileEvents::FailedQuery);
2020-05-22 23:37:14 +00:00
if (ast->as<ASTSelectQuery>() || ast->as<ASTSelectWithUnionQuery>())
{
ProfileEvents::increment(ProfileEvents::FailedSelectQuery);
}
2020-05-22 23:37:14 +00:00
else if (ast->as<ASTInsertQuery>())
{
ProfileEvents::increment(ProfileEvents::FailedInsertQuery);
}
};
2019-04-05 10:52:07 +00:00
res.finish_callback = std::move(finish_callback);
res.exception_callback = std::move(exception_callback);
}
2021-02-26 12:04:11 +00:00
}
catch (...)
{
if (!internal)
2019-06-20 07:17:21 +00:00
{
if (query_for_logging.empty())
query_for_logging = prepareQueryForLogging(query, context);
onExceptionBeforeStart(query_for_logging, context, time_in_microseconds(current_time), ast);
2019-06-20 07:17:21 +00:00
}
throw;
}
2020-02-27 15:40:11 +00:00
return std::make_tuple(ast, std::move(res));
2015-06-18 02:11:05 +00:00
}
BlockIO executeQuery(
const String & query,
2021-05-31 14:49:02 +00:00
ContextMutablePtr context,
bool internal,
2021-02-15 18:57:35 +00:00
QueryProcessingStage::Enum stage)
2015-06-18 02:11:05 +00:00
{
2019-11-04 03:53:26 +00:00
ASTPtr ast;
BlockIO streams;
2021-02-15 18:57:35 +00:00
std::tie(ast, streams) = executeQueryImpl(query.data(), query.data() + query.size(), context, internal, stage, nullptr);
if (const auto * ast_query_with_output = dynamic_cast<const ASTQueryWithOutput *>(ast.get()))
2019-11-04 03:53:26 +00:00
{
String format_name = ast_query_with_output->format
? getIdentifierName(ast_query_with_output->format)
: context->getDefaultFormat();
2019-11-04 03:53:26 +00:00
if (format_name == "Null")
streams.null_format = true;
}
return streams;
}
2020-05-19 14:06:33 +00:00
BlockIO executeQuery(
2021-06-28 22:51:01 +00:00
bool allow_processors,
2020-11-02 19:23:26 +00:00
const String & query,
2021-05-31 14:49:02 +00:00
ContextMutablePtr context,
2020-11-02 19:23:26 +00:00
bool internal,
2021-06-28 22:51:01 +00:00
QueryProcessingStage::Enum stage)
2020-05-19 14:06:33 +00:00
{
2021-09-15 19:35:48 +00:00
if (!allow_processors)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Flag allow_processors is deprecated for executeQuery");
2020-05-19 14:06:33 +00:00
return executeQuery(query, context, internal, stage);
2020-05-19 14:06:33 +00:00
}
2011-10-30 11:30:52 +00:00
void executeQuery(
ReadBuffer & istr,
WriteBuffer & ostr,
bool allow_into_outfile,
2021-05-31 14:49:02 +00:00
ContextMutablePtr context,
2021-08-01 22:12:15 +00:00
SetResultDetailsFunc set_result_details,
2021-08-19 11:07:47 +00:00
const std::optional<FormatSettings> & output_format_settings)
2011-10-30 11:30:52 +00:00
{
PODArray<char> parse_buf;
const char * begin;
const char * end;
2021-02-15 18:57:35 +00:00
istr.nextIfAtEnd();
size_t max_query_size = context->getSettingsRef().max_query_size;
2018-03-26 19:41:55 +00:00
if (istr.buffer().end() - istr.position() > static_cast<ssize_t>(max_query_size))
{
/// If remaining buffer space in 'istr' is enough to parse query up to 'max_query_size' bytes, then parse inplace.
begin = istr.position();
end = istr.buffer().end();
istr.position() += end - begin;
}
else
{
/// FIXME: this is an extra copy not required for async insertion.
/// If not - copy enough data into 'parse_buf'.
WriteBufferFromVector<PODArray<char>> out(parse_buf);
LimitReadBuffer limit(istr, max_query_size + 1, false);
copyData(limit, out);
2020-01-10 21:42:26 +00:00
out.finalize();
begin = parse_buf.data();
end = begin + parse_buf.size();
}
ASTPtr ast;
BlockIO streams;
2021-02-15 18:57:35 +00:00
std::tie(ast, streams) = executeQueryImpl(begin, end, context, false, QueryProcessingStage::Complete, &istr);
2019-03-26 18:28:37 +00:00
auto & pipeline = streams.pipeline;
2021-09-20 09:48:53 +00:00
std::unique_ptr<WriteBuffer> compressed_buffer;
try
{
2021-09-15 19:35:48 +00:00
if (pipeline.pushing())
{
2021-09-17 17:52:26 +00:00
auto pipe = getSourceFromASTInsertQuery(ast, true, pipeline.getHeader(), context, nullptr);
2021-09-15 19:35:48 +00:00
pipeline.complete(std::move(pipe));
}
2021-09-17 11:40:03 +00:00
else if (pipeline.pulling())
{
2021-09-15 19:35:48 +00:00
const ASTQueryWithOutput * ast_query_with_output = dynamic_cast<const ASTQueryWithOutput *>(ast.get());
WriteBuffer * out_buf = &ostr;
if (ast_query_with_output && ast_query_with_output->out_file)
{
if (!allow_into_outfile)
throw Exception("INTO OUTFILE is not allowed", ErrorCodes::INTO_OUTFILE_NOT_ALLOWED);
2021-09-15 19:35:48 +00:00
const auto & out_file = typeid_cast<const ASTLiteral &>(*ast_query_with_output->out_file).value.safeGet<std::string>();
std::string compression_method;
if (ast_query_with_output->compression)
{
const auto & compression_method_node = ast_query_with_output->compression->as<ASTLiteral &>();
compression_method = compression_method_node.value.safeGet<std::string>();
}
2021-08-03 11:54:37 +00:00
compressed_buffer = wrapWriteBufferWithCompressionMethod(
std::make_unique<WriteBufferFromFile>(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT),
chooseCompressionMethod(out_file, compression_method),
2021-08-03 11:54:37 +00:00
/* compression level = */ 3
);
}
String format_name = ast_query_with_output && (ast_query_with_output->format != nullptr)
2021-09-15 19:35:48 +00:00
? getIdentifierName(ast_query_with_output->format)
: context->getDefaultFormat();
2021-09-15 19:35:48 +00:00
auto out = FormatFactory::instance().getOutputFormatParallelIfPossible(
2021-08-03 11:54:37 +00:00
format_name,
compressed_buffer ? *compressed_buffer : *out_buf,
2021-09-19 20:15:10 +00:00
materializeBlock(pipeline.getHeader()),
2021-08-03 11:54:37 +00:00
context,
{},
output_format_settings);
2021-09-15 19:35:48 +00:00
out->setAutoFlush();
/// Save previous progress callback if any. TODO Do it more conveniently.
auto previous_progress_callback = context->getProgressCallback();
/// NOTE Progress callback takes shared ownership of 'out'.
2021-09-15 19:35:48 +00:00
pipeline.setProgressCallback([out, previous_progress_callback] (const Progress & progress)
{
if (previous_progress_callback)
previous_progress_callback(progress);
out->onProgress(progress);
});
if (set_result_details)
set_result_details(
context->getClientInfo().current_query_id, out->getContentType(), format_name, DateLUT::instance().getTimeZone());
2019-02-02 12:24:26 +00:00
2021-09-15 19:35:48 +00:00
pipeline.complete(std::move(out));
}
2021-09-15 19:35:48 +00:00
else
2019-03-26 18:28:37 +00:00
{
2021-09-15 19:35:48 +00:00
pipeline.setProgressCallback(context->getProgressCallback());
2019-03-26 18:28:37 +00:00
}
2021-09-17 11:40:03 +00:00
if (pipeline.initialized())
{
CompletedPipelineExecutor executor(pipeline);
executor.execute();
2019-03-26 18:28:37 +00:00
}
else
{
/// It's possible to have queries without input and output.
}
}
catch (...)
{
streams.onException();
throw;
}
streams.onFinish();
2015-06-18 02:11:05 +00:00
}
2012-03-11 08:52:56 +00:00
2021-07-01 13:21:38 +00:00
void executeTrivialBlockIO(BlockIO & streams, ContextPtr context)
{
try
{
if (!streams.pipeline.initialized())
return;
2021-09-15 19:35:48 +00:00
if (!streams.pipeline.completed())
2021-07-01 13:21:38 +00:00
throw Exception(ErrorCodes::LOGICAL_ERROR, "Query pipeline requires output, but no output buffer provided, it's a bug");
streams.pipeline.setProgressCallback(context->getProgressCallback());
2021-09-15 19:35:48 +00:00
CompletedPipelineExecutor executor(streams.pipeline);
executor.execute();
2021-07-01 13:21:38 +00:00
}
catch (...)
{
streams.onException();
throw;
}
streams.onFinish();
}
2011-10-30 11:30:52 +00:00
}