Merge pull request #62067 from nickitat/stronger_optimize_final_with_rmt

Wait for currently active merges when executing `OPTIMIZE FINAL` on RMT
This commit is contained in:
Nikita Taranov 2024-04-22 19:54:48 +00:00 committed by GitHub
commit aaaff1f2ea
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 44 additions and 5 deletions

View File

@ -5841,11 +5841,24 @@ bool StorageReplicatedMergeTree::optimize(
if (select_decision != SelectPartsDecision::SELECTED)
{
constexpr const char * message_fmt = "Cannot select parts for optimization: {}";
assert(disable_reason.text != unknown_disable_reason);
if (!partition_id.empty())
disable_reason.text += fmt::format(" (in partition {})", partition_id);
return handle_noop(message_fmt, disable_reason.text);
if (try_no + 1 < max_retries)
{
/// Here we trying to have a similar behaviour to ordinary MergeTree: if some merges are already in progress - let's wait for them to finish.
/// This way `optimize final` won't just silently be a noop (if also `optimize_throw_if_noop=false`), but will wait for the active merges and repeat an attempt to schedule final merge.
/// This guarantees are enough for tests, because there we have full control over insertions.
const auto wait_timeout = query_context->getSettingsRef().receive_timeout.totalMilliseconds() / max_retries;
/// DEFAULT (and not LIGHTWEIGHT) because merges are not condidered lightweight; empty `source_replicas` means "all replicas"
waitForProcessingQueue(wait_timeout, SyncReplicaMode::DEFAULT, {});
continue;
}
else
{
constexpr const char * message_fmt = "Cannot select parts for optimization: {}";
assert(disable_reason.text != unknown_disable_reason);
if (!partition_id.empty())
disable_reason.text += fmt::format(" (in partition {})", partition_id);
return handle_noop(message_fmt, disable_reason.text);
}
}
ReplicatedMergeTreeLogEntryData merge_entry;

View File

@ -0,0 +1 @@
1

View File

@ -0,0 +1,25 @@
#!/usr/bin/env bash
# Tags: long, no-random-settings, no-random-merge-tree-settings
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
${CLICKHOUSE_CLIENT} -q "CREATE TABLE 03015_optimize_final_rmt(a UInt64) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/03015_optimize_final_rmt', 'r1') ORDER BY a SETTINGS min_age_to_force_merge_seconds=1, merge_selecting_sleep_ms=100"
for _ in {0..10}; do
${CLICKHOUSE_CLIENT} --insert_deduplicate 0 -q "INSERT INTO 03015_optimize_final_rmt select * from numbers_mt(1e6)"
done
# trigger a merge if it is not already running
${CLICKHOUSE_CLIENT} -q "OPTIMIZE TABLE 03015_optimize_final_rmt FINAL" &
# this query should wait for the running merges, not just return immediately
${CLICKHOUSE_CLIENT} -q "OPTIMIZE TABLE 03015_optimize_final_rmt FINAL"
# then at this point we should have a single part
${CLICKHOUSE_CLIENT} -q "SELECT COUNT() FROM system.parts WHERE database = currentDatabase() AND table = '03015_optimize_final_rmt' AND active"
wait
${CLICKHOUSE_CLIENT} --query "DROP TABLE 03015_optimize_final_rmt"