ClickHouse/src/Interpreters/InterpreterInsertQuery.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

684 lines
28 KiB
C++
Raw Normal View History

#include <Interpreters/InterpreterInsertQuery.h>
2017-07-13 20:58:19 +00:00
#include <Access/Common/AccessFlags.h>
2022-03-29 17:25:34 +00:00
#include <Access/EnabledQuota.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <Columns/ColumnNullable.h>
2021-10-08 14:03:54 +00:00
#include <Processors/Transforms/buildPushingToViewsChain.h>
#include <DataTypes/DataTypeNullable.h>
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
#include <Interpreters/InterpreterWatchQuery.h>
#include <Interpreters/QueryLog.h>
#include <Interpreters/TranslateQualifiedNamesVisitor.h>
#include <Interpreters/addMissingDefaults.h>
#include <Interpreters/getTableExpressions.h>
#include <Interpreters/processColumnTransformers.h>
2022-12-15 12:03:09 +00:00
#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTInsertQuery.h>
#include <Parsers/ASTSelectQuery.h>
2018-02-25 06:34:20 +00:00
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
2021-09-26 14:54:59 +00:00
#include <Processors/Sinks/EmptySink.h>
#include <Processors/Transforms/CheckConstraintsTransform.h>
#include <Processors/Transforms/CountingTransform.h>
2020-11-17 17:16:55 +00:00
#include <Processors/Transforms/ExpressionTransform.h>
2021-04-15 14:34:55 +00:00
#include <Processors/Transforms/MaterializingTransform.h>
#include <Processors/Transforms/SquashingChunksTransform.h>
#include <Processors/Transforms/getSourceFromASTInsertQuery.h>
2022-05-20 19:49:31 +00:00
#include <Processors/QueryPlan/QueryPlan.h>
#include <QueryPipeline/QueryPipelineBuilder.h>
#include <Storages/StorageDistributed.h>
2021-01-05 03:22:06 +00:00
#include <Storages/StorageMaterializedView.h>
2022-05-11 08:47:08 +00:00
#include <Storages/WindowView/StorageWindowView.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <Common/ThreadStatus.h>
#include <Common/checkStackSize.h>
#include <Common/ProfileEvents.h>
namespace ProfileEvents
{
extern const Event InsertQueriesWithSubqueries;
extern const Event QueriesWithSubqueries;
}
2011-10-30 11:30:52 +00:00
namespace DB
{
namespace ErrorCodes
{
2021-07-15 08:23:59 +00:00
extern const int NOT_IMPLEMENTED;
extern const int NO_SUCH_COLUMN_IN_TABLE;
extern const int ILLEGAL_COLUMN;
extern const int DUPLICATE_COLUMN;
}
2018-01-12 13:03:19 +00:00
InterpreterInsertQuery::InterpreterInsertQuery(
2021-09-19 20:15:10 +00:00
const ASTPtr & query_ptr_, ContextPtr context_, bool allow_materialized_, bool no_squash_, bool no_destination_, bool async_insert_)
: WithContext(context_)
, query_ptr(query_ptr_)
, allow_materialized(allow_materialized_)
, no_squash(no_squash_)
, no_destination(no_destination_)
2021-09-19 20:15:10 +00:00
, async_insert(async_insert_)
2011-10-30 11:30:52 +00:00
{
2019-08-10 17:51:47 +00:00
checkStackSize();
2022-03-29 17:25:34 +00:00
if (auto quota = getContext()->getQuota())
quota->checkExceeded(QuotaType::WRITTEN_BYTES);
2011-10-30 11:30:52 +00:00
}
2021-09-17 12:59:40 +00:00
2020-01-24 16:20:36 +00:00
StoragePtr InterpreterInsertQuery::getTable(ASTInsertQuery & query)
2011-10-30 11:30:52 +00:00
{
2022-12-15 12:03:09 +00:00
auto current_context = getContext();
if (query.table_function)
{
const auto & factory = TableFunctionFactory::instance();
2022-12-15 12:03:09 +00:00
TableFunctionPtr table_function_ptr = factory.get(query.table_function, current_context);
/// If table function needs structure hint from select query
/// we can create a temporary pipeline and get the header.
if (query.select && table_function_ptr->needStructureHint())
{
2022-12-15 12:03:09 +00:00
Block header_block;
auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1);
if (current_context->getSettingsRef().allow_experimental_analyzer)
{
2023-01-18 09:44:40 +00:00
InterpreterSelectQueryAnalyzer interpreter_select(query.select, current_context, select_query_options);
2022-12-15 12:03:09 +00:00
header_block = interpreter_select.getSampleBlock();
}
else
{
InterpreterSelectWithUnionQuery interpreter_select{
query.select, current_context, select_query_options};
auto tmp_pipeline = interpreter_select.buildQueryPipeline();
header_block = tmp_pipeline.getHeader();
}
ColumnsDescription structure_hint{header_block.getNamesAndTypesList()};
table_function_ptr->setStructureHint(structure_hint);
}
2022-12-15 12:03:09 +00:00
return table_function_ptr->execute(query.table_function, current_context, table_function_ptr->getName(),
/* cached_columns */ {}, /* use_global_context */ false, /* is_insert_query */true);
}
2021-10-15 08:41:25 +00:00
if (query.table_id)
2021-10-12 23:51:11 +00:00
{
2022-12-15 12:03:09 +00:00
query.table_id = current_context->resolveStorageID(query.table_id);
2021-10-12 23:51:11 +00:00
}
else
{
/// Insert query parser does not fill table_id because table and
2021-11-11 13:28:18 +00:00
/// database can be parameters and be filled after parsing.
2021-10-12 23:51:11 +00:00
StorageID local_table_id(query.getDatabase(), query.getTable());
2022-12-15 12:03:09 +00:00
query.table_id = current_context->resolveStorageID(local_table_id);
2021-10-12 23:51:11 +00:00
}
2022-12-15 12:03:09 +00:00
return DatabaseCatalog::instance().getTable(query.table_id, current_context);
2011-10-30 11:30:52 +00:00
}
Block InterpreterInsertQuery::getSampleBlock(
const ASTInsertQuery & query,
const StoragePtr & table,
const StorageMetadataPtr & metadata_snapshot) const
{
2017-04-02 17:37:49 +00:00
/// If the query does not include information about columns
2013-10-25 14:56:47 +00:00
if (!query.columns)
2018-09-20 12:59:33 +00:00
{
2022-05-11 08:47:08 +00:00
if (auto * window_view = dynamic_cast<StorageWindowView *>(table.get()))
2022-05-14 09:21:54 +00:00
return window_view->getInputHeader();
2022-05-11 08:47:08 +00:00
else if (no_destination)
return metadata_snapshot->getSampleBlockWithVirtuals(table->getVirtuals());
2018-09-20 12:59:33 +00:00
else
2021-09-15 19:35:48 +00:00
return metadata_snapshot->getSampleBlockNonMaterialized();
2018-09-20 12:59:33 +00:00
}
2013-10-25 14:56:47 +00:00
2017-04-02 17:37:49 +00:00
/// Form the block based on the column names from the query
2021-09-15 19:35:48 +00:00
const auto columns_ast = processColumnTransformers(getContext()->getCurrentDatabase(), table, metadata_snapshot, query.columns);
2023-09-15 09:32:41 +00:00
Names names;
names.reserve(columns_ast->children.size());
2020-09-03 17:51:16 +00:00
for (const auto & identifier : columns_ast->children)
2013-10-25 14:56:47 +00:00
{
std::string current_name = identifier->getColumnName();
2021-09-15 19:35:48 +00:00
names.emplace_back(std::move(current_name));
}
2013-10-25 14:56:47 +00:00
2021-09-15 19:35:48 +00:00
return getSampleBlock(names, table, metadata_snapshot);
}
std::optional<Names> InterpreterInsertQuery::getInsertColumnNames() const
{
auto const * insert_query = query_ptr->as<ASTInsertQuery>();
if (!insert_query || !insert_query->columns)
return std::nullopt;
auto table = DatabaseCatalog::instance().getTable(getDatabaseTable(), getContext());
const auto columns_ast = processColumnTransformers(getContext()->getCurrentDatabase(), table, table->getInMemoryMetadataPtr(), insert_query->columns);
2023-09-15 09:32:41 +00:00
Names names;
names.reserve(columns_ast->children.size());
for (const auto & identifier : columns_ast->children)
{
std::string current_name = identifier->getColumnName();
names.emplace_back(std::move(current_name));
}
return names;
}
2021-09-15 19:35:48 +00:00
Block InterpreterInsertQuery::getSampleBlock(
const Names & names,
const StoragePtr & table,
const StorageMetadataPtr & metadata_snapshot) const
{
Block table_sample_physical = metadata_snapshot->getSampleBlock();
Block table_sample_insertable = metadata_snapshot->getSampleBlockInsertable();
2021-09-15 19:35:48 +00:00
Block res;
for (const auto & current_name : names)
{
if (res.has(current_name))
throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Column {} specified more than once", current_name);
/// Column is not ordinary or ephemeral
if (!table_sample_insertable.has(current_name))
{
/// Column is materialized
if (table_sample_physical.has(current_name))
{
if (!allow_materialized)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.",
current_name);
res.insert(ColumnWithTypeAndName(table_sample_physical.getByName(current_name).type, current_name));
}
else /// The table does not have a column with that name
throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "No such column {} in table {}",
current_name, table->getStorageID().getNameForLogs());
}
else
res.insert(ColumnWithTypeAndName(table_sample_insertable.getByName(current_name).type, current_name));
}
2013-10-25 14:56:47 +00:00
return res;
}
2012-03-19 12:57:56 +00:00
static bool hasAggregateFunctions(const IAST * ast)
{
if (const auto * func = typeid_cast<const ASTFunction *>(ast))
2022-06-16 15:41:04 +00:00
if (AggregateUtils::isAggregateFunction(*func))
return true;
for (const auto & child : ast->children)
if (hasAggregateFunctions(child.get()))
return true;
2011-10-30 11:30:52 +00:00
return false;
}
2020-08-24 14:29:31 +00:00
/** A query that just reads all data without any complex computations or filetering.
* If we just pipe the result to INSERT, we don't have to use too many threads for read.
*/
2020-11-02 05:28:37 +00:00
static bool isTrivialSelect(const ASTPtr & select)
2020-08-24 14:29:31 +00:00
{
2020-11-02 08:02:35 +00:00
if (auto * select_query = select->as<ASTSelectQuery>())
2020-11-02 05:28:37 +00:00
{
const auto & tables = select_query->tables();
2020-08-24 14:29:31 +00:00
2020-11-02 05:28:37 +00:00
if (!tables)
return false;
2020-08-24 14:29:31 +00:00
2020-11-02 05:28:37 +00:00
const auto & tables_in_select_query = tables->as<ASTTablesInSelectQuery &>();
2020-08-24 14:29:31 +00:00
2020-11-02 05:28:37 +00:00
if (tables_in_select_query.children.size() != 1)
return false;
2020-08-24 14:29:31 +00:00
2020-11-02 05:28:37 +00:00
const auto & child = tables_in_select_query.children.front();
const auto & table_element = child->as<ASTTablesInSelectQueryElement &>();
const auto & table_expr = table_element.table_expression->as<ASTTableExpression &>();
2020-08-24 14:29:31 +00:00
2020-11-02 05:28:37 +00:00
if (table_expr.subquery)
return false;
2020-08-24 14:29:31 +00:00
2020-11-02 05:28:37 +00:00
/// Note: how to write it in more generic way?
return (!select_query->distinct
&& !select_query->limit_with_ties
&& !select_query->prewhere()
&& !select_query->where()
&& !select_query->groupBy()
&& !select_query->having()
&& !select_query->orderBy()
&& !select_query->limitBy()
&& !hasAggregateFunctions(select_query));
2020-11-02 05:28:37 +00:00
}
/// This query is ASTSelectWithUnionQuery subquery
return false;
2022-05-16 18:59:27 +00:00
}
2020-08-24 14:29:31 +00:00
2021-09-15 19:35:48 +00:00
Chain InterpreterInsertQuery::buildChain(
const StoragePtr & table,
const StorageMetadataPtr & metadata_snapshot,
const Names & columns,
ThreadStatusesHolderPtr thread_status_holder,
2021-09-21 10:35:41 +00:00
std::atomic_uint64_t * elapsed_counter_ms)
2021-09-15 19:35:48 +00:00
{
ProfileEvents::increment(ProfileEvents::InsertQueriesWithSubqueries);
ProfileEvents::increment(ProfileEvents::QueriesWithSubqueries);
ThreadGroupPtr running_group;
if (current_thread)
running_group = current_thread->getThreadGroup();
if (!running_group)
running_group = std::make_shared<ThreadGroup>(getContext());
2021-09-21 10:35:41 +00:00
auto sample = getSampleBlock(columns, table, metadata_snapshot);
Chain sink = buildSink(table, metadata_snapshot, thread_status_holder, running_group, elapsed_counter_ms);
Chain chain = buildPreSinkChain(sink.getInputHeader(), table, metadata_snapshot, sample, thread_status_holder);
chain.appendChain(std::move(sink));
return chain;
2021-09-15 19:35:48 +00:00
}
Chain InterpreterInsertQuery::buildSink(
2021-09-15 19:35:48 +00:00
const StoragePtr & table,
const StorageMetadataPtr & metadata_snapshot,
ThreadStatusesHolderPtr thread_status_holder,
ThreadGroupPtr running_group,
2021-09-21 10:35:41 +00:00
std::atomic_uint64_t * elapsed_counter_ms)
2021-09-15 19:35:48 +00:00
{
2022-09-22 17:57:04 +00:00
ThreadStatus * thread_status = current_thread;
if (!thread_status_holder)
thread_status = nullptr;
2021-09-17 12:05:54 +00:00
auto context_ptr = getContext();
2021-09-15 19:35:48 +00:00
Chain out;
/// Keep a reference to the context to make sure it stays alive until the chain is executed and destroyed
out.addInterpreterContext(context_ptr);
2021-09-15 19:35:48 +00:00
/// NOTE: we explicitly ignore bound materialized views when inserting into Kafka Storage.
/// Otherwise we'll get duplicates when MV reads same rows again from Kafka.
if (table->noPushingToViews() && !no_destination)
{
auto sink = table->write(query_ptr, metadata_snapshot, context_ptr, async_insert);
2022-09-22 17:57:04 +00:00
sink->setRuntimeData(thread_status, elapsed_counter_ms);
2021-09-15 19:35:48 +00:00
out.addSource(std::move(sink));
}
else
{
out = buildPushingToViewsChain(table, metadata_snapshot, context_ptr,
query_ptr, no_destination,
thread_status_holder, running_group, elapsed_counter_ms, async_insert);
2021-09-15 19:35:48 +00:00
}
return out;
}
Chain InterpreterInsertQuery::buildPreSinkChain(
const Block & subsequent_header,
const StoragePtr & table,
const StorageMetadataPtr & metadata_snapshot,
const Block & query_sample_block,
ThreadStatusesHolderPtr thread_status_holder)
{
ThreadStatus * thread_status = current_thread;
if (!thread_status_holder)
thread_status = nullptr;
auto context_ptr = getContext();
const ASTInsertQuery * query = nullptr;
if (query_ptr)
query = query_ptr->as<ASTInsertQuery>();
const Settings & settings = context_ptr->getSettingsRef();
bool null_as_default = query && query->select && context_ptr->getSettingsRef().insert_null_as_default;
/// We create a pipeline of several streams, into which we will write data.
Chain out;
auto input_header = [&]() -> const Block &
{
return out.empty() ? subsequent_header : out.getInputHeader();
};
2021-09-15 19:35:48 +00:00
/// Note that we wrap transforms one on top of another, so we write them in reverse of data processing order.
/// Checking constraints. It must be done after calculation of all defaults, so we can check them on calculated columns.
if (const auto & constraints = metadata_snapshot->getConstraints(); !constraints.empty())
out.addSource(std::make_shared<CheckConstraintsTransform>(
table->getStorageID(), input_header(), metadata_snapshot->getConstraints(), context_ptr));
2021-09-15 19:35:48 +00:00
auto adding_missing_defaults_dag = addMissingDefaults(
query_sample_block,
input_header().getNamesAndTypesList(),
2021-09-15 19:35:48 +00:00
metadata_snapshot->getColumns(),
2021-09-17 12:05:54 +00:00
context_ptr,
2021-09-15 19:35:48 +00:00
null_as_default);
auto adding_missing_defaults_actions = std::make_shared<ExpressionActions>(adding_missing_defaults_dag);
/// Actually we don't know structure of input blocks from query/table,
/// because some clients break insertion protocol (columns != header)
out.addSource(std::make_shared<ConvertingTransform>(query_sample_block, adding_missing_defaults_actions));
/// It's important to squash blocks as early as possible (before other transforms),
/// because other transforms may work inefficient if block size is small.
/// Do not squash blocks if it is a sync INSERT into Distributed, since it lead to double bufferization on client and server side.
/// Client-side bufferization might cause excessive timeouts (especially in case of big blocks).
Rename directory monitor concept into background INSERT (#55978) * Limit log frequence for "Skipping send data over distributed table" message After SYSTEM STOP DISTRIBUTED SENDS it will constantly print this message. Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com> * Rename directory monitor concept into async INSERT Rename the following query settings (with preserving backward compatiblity, by keeping old name as an alias): - distributed_directory_monitor_sleep_time_ms -> distributed_async_insert_sleep_time_ms - distributed_directory_monitor_max_sleep_time_ms -> distributed_async_insert_max_sleep_time_ms - distributed_directory_monitor_batch -> distributed_async_insert_batch_inserts - distributed_directory_monitor_split_batch_on_failure -> distributed_async_insert_split_batch_on_failure Rename the following table settings (with preserving backward compatiblity, by keeping old name as an alias): - monitor_batch_inserts -> async_insert_batch - monitor_split_batch_on_failure -> async_insert_split_batch_on_failure - directory_monitor_sleep_time_ms -> async_insert_sleep_time_ms - directory_monitor_max_sleep_time_ms -> async_insert_max_sleep_time_ms And also update all the references: $ gg -e directory_monitor_ -e monitor_ tests docs | cut -d: -f1 | sort -u | xargs sed -e 's/distributed_directory_monitor_sleep_time_ms/distributed_async_insert_sleep_time_ms/g' -e 's/distributed_directory_monitor_max_sleep_time_ms/distributed_async_insert_max_sleep_time_ms/g' -e 's/distributed_directory_monitor_batch_inserts/distributed_async_insert_batch/g' -e 's/distributed_directory_monitor_split_batch_on_failure/distributed_async_insert_split_batch_on_failure/g' -e 's/monitor_batch_inserts/async_insert_batch/g' -e 's/monitor_split_batch_on_failure/async_insert_split_batch_on_failure/g' -e 's/monitor_sleep_time_ms/async_insert_sleep_time_ms/g' -e 's/monitor_max_sleep_time_ms/async_insert_max_sleep_time_ms/g' -i Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com> * Rename async_insert for Distributed into background_insert This will avoid amigibuity between general async INSERT's and INSERT into Distributed, which are indeed background, so new term express it even better. Mostly done with: $ git di HEAD^ --name-only | xargs sed -i -e 's/distributed_async_insert/distributed_background_insert/g' -e 's/async_insert_batch/background_insert_batch/g' -e 's/async_insert_split_batch_on_failure/background_insert_split_batch_on_failure/g' -e 's/async_insert_sleep_time_ms/background_insert_sleep_time_ms/g' -e 's/async_insert_max_sleep_time_ms/background_insert_max_sleep_time_ms/g' Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com> * Mark 02417_opentelemetry_insert_on_distributed_table as long CI: https://s3.amazonaws.com/clickhouse-test-reports/55978/7a6abb03a0b507e29e999cb7e04f246a119c6f28/stateless_tests_flaky_check__asan_.html Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com> --------- Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2023-11-01 14:09:39 +00:00
if (!(settings.distributed_foreground_insert && table->isRemote()) && !async_insert && !no_squash && !(query && query->watch))
2021-09-15 19:35:48 +00:00
{
bool table_prefers_large_blocks = table->prefersLargeBlocks();
out.addSource(std::make_shared<SquashingChunksTransform>(
input_header(),
2021-09-15 19:35:48 +00:00
table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL));
2021-09-15 19:35:48 +00:00
}
auto counting = std::make_shared<CountingTransform>(input_header(), thread_status, getContext()->getQuota());
2021-09-17 12:05:54 +00:00
counting->setProcessListElement(context_ptr->getProcessListElement());
counting->setProgressCallback(context_ptr->getProgressCallback());
2021-09-15 19:35:48 +00:00
out.addSource(std::move(counting));
return out;
}
2020-08-24 14:29:31 +00:00
2021-09-17 12:59:40 +00:00
BlockIO InterpreterInsertQuery::execute()
2012-03-11 08:52:56 +00:00
{
2021-09-17 12:59:40 +00:00
const Settings & settings = getContext()->getSettingsRef();
auto & query = query_ptr->as<ASTInsertQuery &>();
QueryPipelineBuilder pipeline;
2022-05-20 19:49:31 +00:00
std::optional<QueryPipeline> distributed_pipeline;
QueryPlanResourceHolder resources;
2012-03-11 08:52:56 +00:00
2021-09-17 12:59:40 +00:00
StoragePtr table = getTable(query);
checkStorageSupportsTransactionsIfNeeded(table, getContext());
Fix possible use-after-free for INSERT into MV with concurrent DROP ASan founds [1]: ==553== ERROR: AddressSanitizer: heap-use-after-free on address 0x61e004694080 at pc 0x000029150af2 bp 0x7f70b3f8ef10 sp 0x7f70b3f8ef08 READ of size 8 at 0x61e004694080 thread T477 (QueryPipelineEx) 0 0x29150af1 in DB::MergeTreeDataWriter::writeTempPart() > 1 0x293b8e43 in DB::MergeTreeSink::consume(DB::Chunk) obj-x86_64-linux-gnu/../src/Storages/MergeTree/MergeTreeSink.cpp:27:65 2 0x29dac73b in DB::SinkToStorage::onConsume(DB::Chunk) obj-x86_64-linux-gnu/../src/Processors/Sinks/SinkToStorage.cpp:18:5 3 0x29c72dd2 in DB::ExceptionKeepingTransform::work()::$_1::operator()() const obj-x86_64-linux-gnu/../src/Processors/Transforms/ExceptionKeepingTransform.cpp:151:51 0x61e004694080 is located 2048 bytes inside of 2480-byte region [0x61e004693880,0x61e004694230) freed by thread T199 (BgSchPool) here: ... 4 0x26220f20 in DB::DatabaseCatalog::TableMarkedAsDropped::~TableMarkedAsDropped() obj-x86_64-linux-gnu/../src/Interpreters/DatabaseCatalog.h:248:12 5 0x26220f20 in DB::DatabaseCatalog::dropTableDataTask() obj-x86_64-linux-gnu/../src/Interpreters/DatabaseCatalog.cpp:908:1 [1]: https://s3.amazonaws.com/clickhouse-test-reports/33201/4f04d6af61eabf4899eb8188150dc862aaab80fc/stress_test__address__actions_.html There was a fix in #32572, but it was not complete (yes it reduced the race window a lot, but not completely), since the inner table still can go away after the INSERT chain was built, to fix this obtain the reference earlier. Follow-up for: #32572 (cc @tavplubix)
2022-01-04 10:27:53 +00:00
StoragePtr inner_table;
if (const auto * mv = dynamic_cast<const StorageMaterializedView *>(table.get()))
inner_table = mv->getTargetTable();
2021-07-14 08:49:05 +00:00
if (query.partition_by && !table->supportsPartitionBy())
2021-07-15 08:23:59 +00:00
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage");
2021-07-14 08:49:05 +00:00
2021-09-17 12:59:40 +00:00
auto table_lock = table->lockForShare(getContext()->getInitialQueryId(), settings.lock_acquire_timeout);
auto metadata_snapshot = table->getInMemoryMetadataPtr();
auto query_sample_block = getSampleBlock(query, table, metadata_snapshot);
2022-02-28 14:09:46 +00:00
/// For table functions we check access while executing
/// getTable() -> ITableFunction::execute().
2021-09-17 12:59:40 +00:00
if (!query.table_function)
getContext()->checkAccess(AccessType::INSERT, query.table_id, query_sample_block.getNames());
2020-01-24 16:20:36 +00:00
if (query.select && settings.parallel_distributed_insert_select)
// Distributed INSERT SELECT
2022-05-20 19:49:31 +00:00
distributed_pipeline = table->distributedWrite(query, getContext());
2019-12-12 10:49:15 +00:00
std::vector<Chain> presink_chains;
std::vector<Chain> sink_chains;
2022-05-20 19:49:31 +00:00
if (!distributed_pipeline || query.watch)
2019-12-12 10:49:15 +00:00
{
/// Number of streams works like this:
/// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever
/// InterpreterSelectQuery ends up with.
/// * Use `max_insert_threads` streams for various insert-preparation steps, e.g.
/// materializing and squashing (too slow to do in one thread). That's `presink_chains`.
/// * If the table supports parallel inserts, use the same streams for writing to IStorage.
/// Otherwise ResizeProcessor them down to 1 stream.
/// * If it's not an INSERT SELECT, forget all that and use one stream.
size_t pre_streams_size = 1;
size_t sink_streams_size = 1;
2021-09-15 19:35:48 +00:00
if (query.select)
{
2020-08-24 14:29:31 +00:00
bool is_trivial_insert_select = false;
if (settings.optimize_trivial_insert_select)
2020-08-02 05:35:58 +00:00
{
2020-11-10 06:42:38 +00:00
const auto & select_query = query.select->as<ASTSelectWithUnionQuery &>();
const auto & selects = select_query.list_of_selects->children;
const auto & union_modes = select_query.list_of_modes;
/// ASTSelectWithUnionQuery is not normalized now, so it may pass some queries which can be Trivial select queries
2022-08-30 10:09:01 +00:00
const auto mode_is_all = [](const auto & mode) { return mode == SelectUnionMode::UNION_ALL; };
SYSTEM RESTORE REPLICA replica [ON CLUSTER cluster] (#13652) * initial commit: add setting and stub * typo * added test stub * fix * wip merging new integration test and code proto * adding steps interpreters * adding firstly proposed solution (moving parts etc) * added checking zookeeper path existence * fixing the include * fixing and sorting includes * fixing outdated struct * fix the name * added ast ptr as level of indirection * fix ref * updating the changes * working on test stub * fix iterator -> reference * revert rocksdb submodule update * fixed show privileges test * updated the test stub * replaced rand() with thread_local_rng(), updated the tests updated the test fixed test config path test fix removed error messages fixed the test updated the test fixed string literal fixed literal typo: = * fixed the empty replica error message * updated the test and the code with logs * updated the possible test cases, updated * added the code/test milestone comments * updated the test (added more testcases) * replaced native assert with CH one * individual replicas recursive delete fix * updated the AS db.name AST * two small logging fixes * manually generated AST fixes * Updated the test, added the possible algo change * Some thoughts about optimizing the solution: ALTER MOVE PARTITION .. TO TABLE -> move to detached/ + ALTER ... ATTACH * fix * Removed the replica sync in test as it's invalid * Some test tweaks * tmp * Rewrote the algo by using the executeQuery instead of hand-crafting the ASTPtr. Two questions still active. * tr: logging active parts * Extracted the parts moving algo into a separate helper function * Fixed the test data and the queries slightly * Replaced query to system.parts to direct invocation, started building the test that breaks on various parts. * Added the case for tables when at least one replica is alive * Updated the test to test replicas restoration by detaching/attaching * Altered the test to check restoration without replica restart * Added the tables swap in the start if the server failed last time * Hotfix when only /replicas/replica... path was deleted * Restore ZK paths while creating a replicated MergeTree table * Updated the docs, fixed the algo for individual replicas restoration case * Initial parts table storage fix, tests sync fix * Reverted individual replica restoration to general algo * Slightly optimised getDataParts * Trying another solution with parts detaching * Rewrote algo without any steps, added ON CLUSTER support * Attaching parts from other replica on restoration * Getting part checksums from ZK * Removed ON CLUSTER, finished working solution * Multiple small changes after review * Fixing parallel test * Supporting rewritten form on cluster * Test fix * Moar logging * Using source replica as checksum provider * improve test, remove some code from parser * Trying solution with move to detached + forget * Moving all parts (not only Committed) to detached * Edited docs for RESTORE REPLICA * Re-merging * minor fixes Co-authored-by: Alexander Tokmakov <avtokmakov@yandex-team.ru>
2021-06-20 08:24:43 +00:00
is_trivial_insert_select =
std::all_of(union_modes.begin(), union_modes.end(), std::move(mode_is_all))
&& std::all_of(selects.begin(), selects.end(), isTrivialSelect);
2020-08-24 14:29:31 +00:00
}
if (is_trivial_insert_select)
{
/** When doing trivial INSERT INTO ... SELECT ... FROM table,
* don't need to process SELECT with more than max_insert_threads
* and it's reasonable to set block size for SELECT to the desired block size for INSERT
* to avoid unnecessary squashing.
*/
Settings new_settings = getContext()->getSettings();
2020-08-24 14:29:31 +00:00
new_settings.max_threads = std::max<UInt64>(1, settings.max_insert_threads);
if (table->prefersLargeBlocks())
{
if (settings.min_insert_block_size_rows)
new_settings.max_block_size = settings.min_insert_block_size_rows;
if (settings.min_insert_block_size_bytes)
new_settings.preferred_block_size_bytes = settings.min_insert_block_size_bytes;
}
2020-08-24 14:29:31 +00:00
auto new_context = Context::createCopy(context);
new_context->setSettings(new_settings);
2023-09-14 20:10:07 +00:00
new_context->setInsertionTable(getContext()->getInsertionTable(), getContext()->getInsertionTableColumnNames());
2020-08-24 14:29:31 +00:00
2022-12-15 12:03:09 +00:00
auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1);
if (settings.allow_experimental_analyzer)
{
2023-01-18 09:44:40 +00:00
InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, new_context, select_query_options);
2023-01-20 11:19:16 +00:00
pipeline = interpreter_select_analyzer.buildQueryPipeline();
2022-12-15 12:03:09 +00:00
}
else
{
2023-01-21 11:19:09 +00:00
InterpreterSelectWithUnionQuery interpreter_select(query.select, new_context, select_query_options);
2022-12-15 12:03:09 +00:00
pipeline = interpreter_select.buildQueryPipeline();
}
}
else
{
2023-01-21 11:19:09 +00:00
/// Passing 1 as subquery_depth will disable limiting size of intermediate result.
2022-12-15 12:03:09 +00:00
auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1);
if (settings.allow_experimental_analyzer)
{
2023-01-18 09:44:40 +00:00
InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, getContext(), select_query_options);
2023-01-20 11:19:16 +00:00
pipeline = interpreter_select_analyzer.buildQueryPipeline();
2022-12-15 12:03:09 +00:00
}
else
{
2023-01-21 11:19:09 +00:00
InterpreterSelectWithUnionQuery interpreter_select(query.select, getContext(), select_query_options);
2022-12-15 12:03:09 +00:00
pipeline = interpreter_select.buildQueryPipeline();
}
}
2019-12-12 10:49:15 +00:00
2021-09-15 19:35:48 +00:00
pipeline.dropTotalsAndExtremes();
if (settings.max_insert_threads > 1)
{
pre_streams_size = std::min(static_cast<size_t>(settings.max_insert_threads), pipeline.getNumStreams());
if (table->supportsParallelInsert())
sink_streams_size = pre_streams_size;
}
2020-05-27 18:20:26 +00:00
pipeline.resize(pre_streams_size);
2021-04-22 21:30:31 +00:00
/// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values.
2021-04-22 22:13:07 +00:00
if (getContext()->getSettingsRef().insert_null_as_default)
2021-04-22 21:30:31 +00:00
{
2021-09-15 19:35:48 +00:00
const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName();
2021-09-17 12:59:40 +00:00
const auto & query_columns = query_sample_block.getColumnsWithTypeAndName();
const auto & output_columns = metadata_snapshot->getColumns();
2021-04-23 19:08:13 +00:00
if (input_columns.size() == query_columns.size())
{
2021-04-23 19:08:13 +00:00
for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx)
{
/// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with
2022-05-09 19:13:02 +00:00
/// default column values (in AddingDefaultsTransform), so all values will be cast correctly.
2023-02-08 19:14:28 +00:00
if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) && output_columns.has(query_columns[col_idx].name))
query_sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), query_columns[col_idx].name));
2021-04-23 19:08:13 +00:00
}
}
}
}
else if (query.watch)
{
InterpreterWatchQuery interpreter_watch{ query.watch, getContext() };
2021-09-16 17:40:42 +00:00
pipeline = interpreter_watch.buildQueryPipeline();
}
2018-09-20 11:40:04 +00:00
ThreadGroupPtr running_group;
if (current_thread)
running_group = current_thread->getThreadGroup();
if (!running_group)
running_group = std::make_shared<ThreadGroup>(getContext());
for (size_t i = 0; i < sink_streams_size; ++i)
2019-12-12 10:49:15 +00:00
{
auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr,
running_group, /* elapsed_counter_ms= */ nullptr);
sink_chains.emplace_back(std::move(out));
}
for (size_t i = 0; i < pre_streams_size; ++i)
{
auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot,
query_sample_block, /* thread_status_holder= */ nullptr);
presink_chains.emplace_back(std::move(out));
}
2019-12-12 10:49:15 +00:00
}
2021-09-19 18:53:36 +00:00
BlockIO res;
/// What type of query: INSERT or INSERT SELECT or INSERT WATCH?
2022-05-20 19:49:31 +00:00
if (distributed_pipeline)
2020-05-27 18:20:26 +00:00
{
2022-05-20 19:49:31 +00:00
res.pipeline = std::move(*distributed_pipeline);
2020-05-27 18:20:26 +00:00
}
else if (query.select || query.watch)
2012-03-11 08:52:56 +00:00
{
const auto & header = presink_chains.at(0).getInputHeader();
2020-11-17 17:16:55 +00:00
auto actions_dag = ActionsDAG::makeConvertingActions(
2021-09-15 19:35:48 +00:00
pipeline.getHeader().getColumnsWithTypeAndName(),
2020-11-17 17:16:55 +00:00
header.getColumnsWithTypeAndName(),
ActionsDAG::MatchColumnsMode::Position);
2021-05-19 14:32:07 +00:00
auto actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes));
2020-05-27 18:20:26 +00:00
2021-09-15 19:35:48 +00:00
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
2019-12-12 10:49:15 +00:00
{
2020-11-17 17:16:55 +00:00
return std::make_shared<ExpressionTransform>(in_header, actions);
2020-05-27 18:20:26 +00:00
});
2019-12-12 10:49:15 +00:00
2021-12-08 15:29:00 +00:00
/// We need to convert Sparse columns to full, because it's destination storage
/// may not support it or may have different settings for applying Sparse serialization.
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
2021-04-15 14:34:55 +00:00
{
return std::make_shared<MaterializingTransform>(in_header);
});
size_t num_select_threads = pipeline.getNumThreads();
2022-05-20 19:49:31 +00:00
for (auto & chain : presink_chains)
resources = chain.detachResources();
for (auto & chain : sink_chains)
2022-05-20 19:49:31 +00:00
resources = chain.detachResources();
pipeline.addChains(std::move(presink_chains));
pipeline.resize(sink_chains.size());
pipeline.addChains(std::move(sink_chains));
if (!settings.parallel_view_processing)
{
/// Don't use more threads for INSERT than for SELECT to reduce memory consumption.
if (pipeline.getNumThreads() > num_select_threads)
pipeline.setMaxThreads(num_select_threads);
}
else if (pipeline.getNumThreads() < settings.max_threads)
{
/// It is possible for query to have max_threads=1, due to optimize_trivial_insert_select,
/// however in case of parallel_view_processing and multiple views, views can still be processed in parallel.
///
/// Note, number of threads will be limited by buildPushingToViewsChain() to max_threads.
pipeline.setMaxThreads(settings.max_threads);
}
2021-09-15 19:35:48 +00:00
pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr
{
2021-09-26 14:54:59 +00:00
return std::make_shared<EmptySink>(cur_header);
2020-05-27 18:20:26 +00:00
});
if (!allow_materialized)
{
for (const auto & column : metadata_snapshot->getColumns())
2020-05-27 18:20:26 +00:00
if (column.default_desc.kind == ColumnDefaultKind::Materialized && header.has(column.name))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name);
}
2021-09-15 19:35:48 +00:00
2022-05-24 20:06:08 +00:00
res.pipeline = QueryPipelineBuilder::getPipeline(std::move(pipeline));
}
2019-12-16 13:52:32 +00:00
else
2021-09-10 12:38:30 +00:00
{
presink_chains.at(0).appendChain(std::move(sink_chains.at(0)));
res.pipeline = QueryPipeline(std::move(presink_chains[0]));
2021-09-15 19:35:48 +00:00
res.pipeline.setNumThreads(std::min<size_t>(res.pipeline.getNumThreads(), settings.max_threads));
2023-05-07 04:29:04 +00:00
res.pipeline.setConcurrencyControl(settings.use_concurrency_control);
2020-05-27 18:20:26 +00:00
2021-09-19 20:15:10 +00:00
if (query.hasInlinedData() && !async_insert)
2021-09-15 19:35:48 +00:00
{
/// can execute without additional data
auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr);
for (auto && buffer : owned_buffers)
format->addBuffer(std::move(buffer));
auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr);
2021-09-15 19:35:48 +00:00
res.pipeline.complete(std::move(pipe));
}
}
2022-05-20 19:49:31 +00:00
res.pipeline.addResources(std::move(resources));
Fix table lifetime in case of parallel DROP TABLE and INSERT Stress tests founds [1]: ==527==WARNING: MemorySanitizer: use-of-uninitialized-value 0 0x37078ffd in unsigned long std::__1::__cxx_atomic_fetch_add<unsigned long>(std::__1::__cxx_atomic_base_impl<unsigned long>*, unsigned long, std::__1::memory_order) obj-x86_64-linux-gnu/../contrib/libcxx/include/atomic:1050:12 1 0x37078ffd in std::__1::__atomic_base<unsigned long, true>::fetch_add(unsigned long, std::__1::memory_order) obj-x86_64-linux-gnu/../contrib/libcxx/include/atomic:1719:17 2 0x37078ffd in std::__1::__atomic_base<unsigned long, true>::operator++() obj-x86_64-linux-gnu/../contrib/libcxx/include/atomic:1756:57 3 0x37078ffd in SimpleIncrement::get() obj-x86_64-linux-gnu/../src/Common/SimpleIncrement.h:20:16 4 0x37078ffd in DB::MergeTreeDataWriter::writeTempPart(DB::BlockWithPartition&, std::__1::shared_ptr<DB::StorageInMemoryMetadata const> const&, std::__1::shared_ptr<DB::Context const>) obj-x86_64-linux-gnu/../src/Storages/MergeTree/MergeTreeDataWriter.cpp:276:46 5 0x373c446c in DB::MergeTreeSink::consume(DB::Chunk) obj-x86_64-linux-gnu/../src/Storages/MergeTree/MergeTreeSink.cpp:27:65 Uninitialized value was created by a heap deallocation 6 0x32d481e8 in DB::DatabaseCatalog::TableMarkedAsDropped::~TableMarkedAsDropped() obj-x86_64-linux-gnu/../src/Interpreters/DatabaseCatalog.h:248:12 7 0x32d3c134 in DB::DatabaseCatalog::dropTableDataTask() obj-x86_64-linux-gnu/../src/Interpreters/DatabaseCatalog.cpp:908:1 [1]: https://s3.amazonaws.com/clickhouse-test-reports/32534/fa6090f588dbf4cbb5f28bd2210847b070bb8218/stress_test__memory__actions_.html The query was CREATE MATERIALIZED VIEW ... POPULATE AS SELECT ... from 00040_aggregating_materialized_view test.
2021-12-11 07:57:23 +00:00
res.pipeline.addStorageHolder(table);
Fix possible use-after-free for INSERT into MV with concurrent DROP ASan founds [1]: ==553== ERROR: AddressSanitizer: heap-use-after-free on address 0x61e004694080 at pc 0x000029150af2 bp 0x7f70b3f8ef10 sp 0x7f70b3f8ef08 READ of size 8 at 0x61e004694080 thread T477 (QueryPipelineEx) 0 0x29150af1 in DB::MergeTreeDataWriter::writeTempPart() > 1 0x293b8e43 in DB::MergeTreeSink::consume(DB::Chunk) obj-x86_64-linux-gnu/../src/Storages/MergeTree/MergeTreeSink.cpp:27:65 2 0x29dac73b in DB::SinkToStorage::onConsume(DB::Chunk) obj-x86_64-linux-gnu/../src/Processors/Sinks/SinkToStorage.cpp:18:5 3 0x29c72dd2 in DB::ExceptionKeepingTransform::work()::$_1::operator()() const obj-x86_64-linux-gnu/../src/Processors/Transforms/ExceptionKeepingTransform.cpp:151:51 0x61e004694080 is located 2048 bytes inside of 2480-byte region [0x61e004693880,0x61e004694230) freed by thread T199 (BgSchPool) here: ... 4 0x26220f20 in DB::DatabaseCatalog::TableMarkedAsDropped::~TableMarkedAsDropped() obj-x86_64-linux-gnu/../src/Interpreters/DatabaseCatalog.h:248:12 5 0x26220f20 in DB::DatabaseCatalog::dropTableDataTask() obj-x86_64-linux-gnu/../src/Interpreters/DatabaseCatalog.cpp:908:1 [1]: https://s3.amazonaws.com/clickhouse-test-reports/33201/4f04d6af61eabf4899eb8188150dc862aaab80fc/stress_test__address__actions_.html There was a fix in #32572, but it was not complete (yes it reduced the race window a lot, but not completely), since the inner table still can go away after the INSERT chain was built, to fix this obtain the reference earlier. Follow-up for: #32572 (cc @tavplubix)
2022-01-04 10:27:53 +00:00
if (inner_table)
res.pipeline.addStorageHolder(inner_table);
Fix table lifetime in case of parallel DROP TABLE and INSERT Stress tests founds [1]: ==527==WARNING: MemorySanitizer: use-of-uninitialized-value 0 0x37078ffd in unsigned long std::__1::__cxx_atomic_fetch_add<unsigned long>(std::__1::__cxx_atomic_base_impl<unsigned long>*, unsigned long, std::__1::memory_order) obj-x86_64-linux-gnu/../contrib/libcxx/include/atomic:1050:12 1 0x37078ffd in std::__1::__atomic_base<unsigned long, true>::fetch_add(unsigned long, std::__1::memory_order) obj-x86_64-linux-gnu/../contrib/libcxx/include/atomic:1719:17 2 0x37078ffd in std::__1::__atomic_base<unsigned long, true>::operator++() obj-x86_64-linux-gnu/../contrib/libcxx/include/atomic:1756:57 3 0x37078ffd in SimpleIncrement::get() obj-x86_64-linux-gnu/../src/Common/SimpleIncrement.h:20:16 4 0x37078ffd in DB::MergeTreeDataWriter::writeTempPart(DB::BlockWithPartition&, std::__1::shared_ptr<DB::StorageInMemoryMetadata const> const&, std::__1::shared_ptr<DB::Context const>) obj-x86_64-linux-gnu/../src/Storages/MergeTree/MergeTreeDataWriter.cpp:276:46 5 0x373c446c in DB::MergeTreeSink::consume(DB::Chunk) obj-x86_64-linux-gnu/../src/Storages/MergeTree/MergeTreeSink.cpp:27:65 Uninitialized value was created by a heap deallocation 6 0x32d481e8 in DB::DatabaseCatalog::TableMarkedAsDropped::~TableMarkedAsDropped() obj-x86_64-linux-gnu/../src/Interpreters/DatabaseCatalog.h:248:12 7 0x32d3c134 in DB::DatabaseCatalog::dropTableDataTask() obj-x86_64-linux-gnu/../src/Interpreters/DatabaseCatalog.cpp:908:1 [1]: https://s3.amazonaws.com/clickhouse-test-reports/32534/fa6090f588dbf4cbb5f28bd2210847b070bb8218/stress_test__memory__actions_.html The query was CREATE MATERIALIZED VIEW ... POPULATE AS SELECT ... from 00040_aggregating_materialized_view test.
2021-12-11 07:57:23 +00:00
return res;
2012-03-11 08:52:56 +00:00
}
2020-03-02 20:23:58 +00:00
StorageID InterpreterInsertQuery::getDatabaseTable() const
2018-07-16 14:52:02 +00:00
{
2020-03-02 20:23:58 +00:00
return query_ptr->as<ASTInsertQuery &>().table_id;
2018-07-16 14:52:02 +00:00
}
void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, ContextPtr context_)
2020-12-14 03:30:39 +00:00
{
const auto & insert_table = context_->getInsertionTable();
2020-12-14 03:30:39 +00:00
if (!insert_table.empty())
{
elem.query_databases.insert(insert_table.getDatabaseName());
elem.query_tables.insert(insert_table.getFullNameNotQuoted());
}
}
void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr context_) const
{
extendQueryLogElemImpl(elem, context_);
}
2011-10-30 11:30:52 +00:00
}