Merge pull request #68120 from ClickHouse/backport/24.3/68052

Backport #68052 to 24.3: Fix skip of parts in mutation with analyzer
This commit is contained in:
Anton Popov 2024-08-10 00:48:35 +02:00 committed by GitHub
commit faaac31bd3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 31 additions and 8 deletions

View File

@ -143,7 +143,6 @@ ColumnDependencies getAllColumnDependencies(
bool isStorageTouchedByMutations(
MergeTreeData & storage,
MergeTreeData::DataPartPtr source_part,
const StorageMetadataPtr & metadata_snapshot,
const std::vector<MutationCommand> & commands,
@ -152,7 +151,9 @@ bool isStorageTouchedByMutations(
if (commands.empty())
return false;
auto storage_from_part = std::make_shared<StorageFromMergeTreeDataPart>(source_part);
bool all_commands_can_be_skipped = true;
for (const auto & command : commands)
{
if (command.type == MutationCommand::APPLY_DELETED_MASK)
@ -167,7 +168,7 @@ bool isStorageTouchedByMutations(
if (command.partition)
{
const String partition_id = storage.getPartitionIDFromQuery(command.partition, context);
const String partition_id = storage_from_part->getPartitionIDFromQuery(command.partition, context);
if (partition_id == source_part->info.partition_id)
all_commands_can_be_skipped = false;
}
@ -181,20 +182,18 @@ bool isStorageTouchedByMutations(
if (all_commands_can_be_skipped)
return false;
auto storage_from_part = std::make_shared<StorageFromMergeTreeDataPart>(source_part);
std::optional<InterpreterSelectQuery> interpreter_select_query;
BlockIO io;
if (context->getSettingsRef().allow_experimental_analyzer)
{
auto select_query_tree = prepareQueryAffectedQueryTree(commands, storage.shared_from_this(), context);
auto select_query_tree = prepareQueryAffectedQueryTree(commands, storage_from_part, context);
InterpreterSelectQueryAnalyzer interpreter(select_query_tree, context, SelectQueryOptions().ignoreLimits());
io = interpreter.execute();
}
else
{
ASTPtr select_query = prepareQueryAffectedAST(commands, storage.shared_from_this(), context);
ASTPtr select_query = prepareQueryAffectedAST(commands, storage_from_part, context);
/// Interpreter must be alive, when we use result of execute() method.
/// For some reason it may copy context and give it into ExpressionTransform
/// after that we will use context from destroyed stack frame in our stream.

View File

@ -19,7 +19,6 @@ using QueryPipelineBuilderPtr = std::unique_ptr<QueryPipelineBuilder>;
/// Return false if the data isn't going to be changed by mutations.
bool isStorageTouchedByMutations(
MergeTreeData & storage,
MergeTreeData::DataPartPtr source_part,
const StorageMetadataPtr & metadata_snapshot,
const std::vector<MutationCommand> & commands,

View File

@ -2110,7 +2110,7 @@ bool MutateTask::prepare()
ctx->commands_for_part.emplace_back(command);
if (ctx->source_part->isStoredOnDisk() && !isStorageTouchedByMutations(
*ctx->data, ctx->source_part, ctx->metadata_snapshot, ctx->commands_for_part, context_for_reading))
ctx->source_part, ctx->metadata_snapshot, ctx->commands_for_part, context_for_reading))
{
NameSet files_to_copy_instead_of_hardlinks;
auto settings_ptr = ctx->data->getSettings();

View File

@ -0,0 +1,4 @@
1_1_1_0_3 10000
1_1_1_0_4 0
2_2_2_0_3 0
2_2_2_0_4 10000

View File

@ -0,0 +1,21 @@
DROP TABLE IF EXISTS t_mutate_skip_part;
CREATE TABLE t_mutate_skip_part (key UInt64, id UInt64, v1 UInt64, v2 UInt64) ENGINE = MergeTree ORDER BY id PARTITION BY key;
INSERT INTO t_mutate_skip_part SELECT 1, number, number, number FROM numbers(10000);
INSERT INTO t_mutate_skip_part SELECT 2, number, number, number FROM numbers(10000);
SET mutations_sync = 2;
ALTER TABLE t_mutate_skip_part UPDATE v1 = 1000 WHERE key = 1;
ALTER TABLE t_mutate_skip_part DELETE WHERE key = 2 AND v2 % 10 = 0;
SYSTEM FLUSH LOGS;
-- If part is skipped in mutation and hardlinked then read_rows must be 0.
SELECT part_name, read_rows
FROM system.part_log
WHERE database = currentDatabase() AND table = 't_mutate_skip_part' AND event_type = 'MutatePart'
ORDER BY part_name;
DROP TABLE IF EXISTS t_mutate_skip_part;