mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 09:32:06 +00:00
Merge pull request #44461 from ClickHouse/parts-removal-limit
Add part removal limit for one CleanupThread iteration
This commit is contained in:
commit
c646048af9
@ -127,6 +127,13 @@ Default value: 100000.
|
||||
|
||||
A large number of parts in a table reduces performance of ClickHouse queries and increases ClickHouse boot time. Most often this is a consequence of an incorrect design (mistakes when choosing a partitioning strategy - too small partitions).
|
||||
|
||||
## simultaneous_parts_removal_limit {#simultaneous-parts-removal-limit}
|
||||
|
||||
If there are a lot of outdated parts cleanup thread will try to delete up to `simultaneous_parts_removal_limit` parts during one iteration.
|
||||
`simultaneous_parts_removal_limit` set to `0` means unlimited.
|
||||
|
||||
Default value: 0.
|
||||
|
||||
## replicated_deduplication_window {#replicated-deduplication-window}
|
||||
|
||||
The number of most recently inserted blocks for which ClickHouse Keeper stores hash sums to check for duplicates.
|
||||
|
@ -69,6 +69,8 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <iomanip>
|
||||
#include <limits>
|
||||
#include <optional>
|
||||
#include <set>
|
||||
#include <thread>
|
||||
@ -1761,11 +1763,20 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force)
|
||||
auto time_now = time(nullptr);
|
||||
|
||||
{
|
||||
auto removal_limit = getSettings()->simultaneous_parts_removal_limit;
|
||||
size_t current_removal_limit = removal_limit == 0 ? std::numeric_limits<size_t>::max() : static_cast<size_t>(removal_limit);
|
||||
|
||||
auto parts_lock = lockParts();
|
||||
|
||||
auto outdated_parts_range = getDataPartsStateRange(DataPartState::Outdated);
|
||||
for (auto it = outdated_parts_range.begin(); it != outdated_parts_range.end(); ++it)
|
||||
{
|
||||
if (parts_to_delete.size() == current_removal_limit)
|
||||
{
|
||||
LOG_TRACE(log, "Found {} parts to remove and reached the limit for one removal iteration", current_removal_limit);
|
||||
break;
|
||||
}
|
||||
|
||||
const DataPartPtr & part = *it;
|
||||
|
||||
part->last_removal_attemp_time.store(time_now, std::memory_order_relaxed);
|
||||
|
@ -76,6 +76,9 @@ struct Settings;
|
||||
M(UInt64, max_delay_to_insert, 1, "Max delay of inserting data into MergeTree table in seconds, if there are a lot of unmerged parts in single partition.", 0) \
|
||||
M(UInt64, max_parts_in_total, 100000, "If more than this number active parts in all partitions in total, throw 'Too many parts ...' exception.", 0) \
|
||||
\
|
||||
/* Part removal settings. */ \
|
||||
M(UInt64, simultaneous_parts_removal_limit, 0, "Maximum number of parts to remove during one CleanupThread iteration (0 means unlimited).", 0) \
|
||||
\
|
||||
/** Replication settings. */ \
|
||||
M(UInt64, replicated_deduplication_window, 100, "How many last blocks of hashes should be kept in ZooKeeper (old blocks will be deleted).", 0) \
|
||||
M(UInt64, replicated_deduplication_window_seconds, 7 * 24 * 60 * 60 /* one week */, "Similar to \"replicated_deduplication_window\", but determines old blocks by their lifetime. Hash of an inserted block will be deleted (and the block will not be deduplicated after) if it outside of one \"window\". You can set very big replicated_deduplication_window to avoid duplicating INSERTs during that period of time.", 0) \
|
||||
|
Loading…
Reference in New Issue
Block a user