Merge pull request #42423 from ClickHouse/optimize-partitions-in-background

Optimize partitions in background
This commit is contained in:
Alexey Milovidov 2022-10-24 19:41:50 +02:00 committed by GitHub
commit 9ee7131f67
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 106 additions and 0 deletions

View File

@ -333,6 +333,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge(
SimpleMergeSelector::Settings merge_settings;
/// Override value from table settings
merge_settings.max_parts_to_merge_at_once = data_settings->max_parts_to_merge_at_once;
merge_settings.min_age_to_force_merge = data_settings->min_age_to_force_merge_seconds;
if (aggressive)
merge_settings.base = 1;

View File

@ -62,6 +62,7 @@ struct Settings;
M(UInt64, merge_tree_clear_old_temporary_directories_interval_seconds, 60, "The period of executing the clear old temporary directories operation in background.", 0) \
M(UInt64, merge_tree_clear_old_parts_interval_seconds, 1, "The period of executing the clear old parts operation in background.", 0) \
M(UInt64, merge_tree_clear_old_broken_detached_parts_ttl_timeout_seconds, 1ULL * 3600 * 24 * 30, "Remove old broken detached parts in the background if they remained intouched for a specified by this setting period of time.", 0) \
M(UInt64, min_age_to_force_merge_seconds, 0, "If all parts in a certain range are older than this value, range will be always eligible for merging. Set to 0 to disable.", 0) \
M(UInt64, merge_tree_enable_clear_old_broken_detached, false, "Enable clearing old broken detached parts operation in background.", 0) \
M(Bool, remove_rolled_back_parts_immediately, 1, "Setting for an incomplete experimental feature.", 0) \
\

View File

@ -102,6 +102,9 @@ bool allow(
double max_size_to_lower_base_log,
const SimpleMergeSelector::Settings & settings)
{
if (settings.min_age_to_force_merge && min_age >= settings.min_age_to_force_merge)
return true;
// std::cerr << "sum_size: " << sum_size << "\n";
/// Map size to 0..1 using logarithmic scale

View File

@ -141,6 +141,11 @@ public:
double heuristic_to_align_parts_max_absolute_difference_in_powers_of_two = 0.5;
double heuristic_to_align_parts_max_score_adjustment = 0.75;
/** If it's not 0, all part ranges that have min_age larger than min_age_to_force_merge
* will be considered for merging
*/
size_t min_age_to_force_merge = 0;
/** Heuristic:
* From right side of range, remove all parts, that size is less than specified ratio of sum_size.
*/

View File

@ -0,0 +1,8 @@
<clickhouse>
<zookeeper>
<node index="1">
<host>zoo1</host>
<port>2181</port>
</node>
</zookeeper>
</clickhouse>

View File

@ -0,0 +1,88 @@
import pytest
import time
from helpers.client import QueryRuntimeException
from helpers.cluster import ClickHouseCluster
from helpers.test_tools import TSV
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance(
"node",
main_configs=["configs/zookeeper_config.xml"],
with_zookeeper=True,
)
@pytest.fixture(scope="module")
def start_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def get_part_number(table_name):
return TSV(
node.query(
f"SELECT count(*) FROM system.parts where table='{table_name}' and active=1"
)
)
def check_expected_part_number(seconds, table_name, expected):
ok = False
for i in range(int(seconds) * 2):
result = get_part_number(table_name)
if result == expected:
ok = True
break
else:
time.sleep(1)
assert ok
def test_without_force_merge_old_parts(start_cluster):
node.query(
"CREATE TABLE test_without_merge (i Int64) ENGINE = MergeTree ORDER BY i;"
)
node.query("INSERT INTO test_without_merge SELECT 1")
node.query("INSERT INTO test_without_merge SELECT 2")
node.query("INSERT INTO test_without_merge SELECT 3")
expected = TSV("""3\n""")
# verify that the parts don't get merged
for i in range(10):
if get_part_number("test_without_merge") != expected:
assert False
time.sleep(1)
node.query("DROP TABLE test_without_merge;")
def test_force_merge_old_parts(start_cluster):
node.query(
"CREATE TABLE test_with_merge (i Int64) ENGINE = MergeTree ORDER BY i SETTINGS min_age_to_force_merge_seconds=5;"
)
node.query("INSERT INTO test_with_merge SELECT 1")
node.query("INSERT INTO test_with_merge SELECT 2")
node.query("INSERT INTO test_with_merge SELECT 3")
expected = TSV("""1\n""")
check_expected_part_number(10, "test_with_merge", expected)
node.query("DROP TABLE test_with_merge;")
def test_force_merge_old_parts_replicated_merge_tree(start_cluster):
node.query(
"CREATE TABLE test_replicated (i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/testing/test', 'node') ORDER BY i SETTINGS min_age_to_force_merge_seconds=5;"
)
node.query("INSERT INTO test_replicated SELECT 1")
node.query("INSERT INTO test_replicated SELECT 2")
node.query("INSERT INTO test_replicated SELECT 3")
expected = TSV("""1\n""")
check_expected_part_number(10, "test_replicated", expected)
node.query("DROP TABLE test_replicated;")