mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-27 01:51:59 +00:00
Merge pull request #61250 from ClickHouse/less-contentaion-in-cache-part4
Less contention in cache, part 4
This commit is contained in:
commit
33a0e8035f
@ -489,6 +489,8 @@ The server successfully detected this situation and will download merged part fr
|
||||
M(FilesystemCacheFailToReserveSpaceBecauseOfLockContention, "Number of times space reservation was skipped due to a high contention on the cache lock") \
|
||||
M(FilesystemCacheHoldFileSegments, "Filesystem cache file segments count, which were hold") \
|
||||
M(FilesystemCacheUnusedHoldFileSegments, "Filesystem cache file segments count, which were hold, but not used (because of seek or LIMIT n, etc)") \
|
||||
M(FilesystemCacheFreeSpaceKeepingThreadRun, "Number of times background thread executed free space keeping job") \
|
||||
M(FilesystemCacheFreeSpaceKeepingThreadWorkMilliseconds, "Time for which background thread executed free space keeping job") \
|
||||
\
|
||||
M(RemoteFSSeeks, "Total number of seeks for async buffer") \
|
||||
M(RemoteFSPrefetches, "Number of prefetches made with asynchronous reading from remote filesystem") \
|
||||
|
@ -28,6 +28,8 @@ namespace ProfileEvents
|
||||
extern const Event FilesystemCacheGetOrSetMicroseconds;
|
||||
extern const Event FilesystemCacheGetMicroseconds;
|
||||
extern const Event FilesystemCacheFailToReserveSpaceBecauseOfLockContention;
|
||||
extern const Event FilesystemCacheFreeSpaceKeepingThreadRun;
|
||||
extern const Event FilesystemCacheFreeSpaceKeepingThreadWorkMilliseconds;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
@ -86,6 +88,9 @@ FileCache::FileCache(const std::string & cache_name, const FileCacheSettings & s
|
||||
, boundary_alignment(settings.boundary_alignment)
|
||||
, load_metadata_threads(settings.load_metadata_threads)
|
||||
, write_cache_per_user_directory(settings.write_cache_per_user_id_directory)
|
||||
, keep_current_size_to_max_ratio(1 - settings.keep_free_space_size_ratio)
|
||||
, keep_current_elements_to_max_ratio(1 - settings.keep_free_space_elements_ratio)
|
||||
, keep_up_free_space_remove_batch(settings.keep_free_space_remove_batch)
|
||||
, log(getLogger("FileCache(" + cache_name + ")"))
|
||||
, metadata(settings.base_path, settings.background_download_queue_size_limit, settings.background_download_threads, write_cache_per_user_directory)
|
||||
{
|
||||
@ -192,6 +197,12 @@ void FileCache::initialize()
|
||||
|
||||
metadata.startup();
|
||||
|
||||
if (keep_current_size_to_max_ratio != 1 || keep_current_elements_to_max_ratio != 1)
|
||||
{
|
||||
keep_up_free_space_ratio_task = Context::getGlobalContextInstance()->getSchedulePool().createTask(log->name(), [this] { freeSpaceRatioKeepingThreadFunc(); });
|
||||
keep_up_free_space_ratio_task->schedule();
|
||||
}
|
||||
|
||||
is_initialized = true;
|
||||
}
|
||||
|
||||
@ -946,6 +957,121 @@ bool FileCache::tryReserve(
|
||||
return true;
|
||||
}
|
||||
|
||||
void FileCache::freeSpaceRatioKeepingThreadFunc()
|
||||
{
|
||||
static constexpr auto lock_failed_reschedule_ms = 1000;
|
||||
static constexpr auto space_ratio_satisfied_reschedule_ms = 5000;
|
||||
static constexpr auto general_reschedule_ms = 5000;
|
||||
|
||||
if (shutdown)
|
||||
return;
|
||||
|
||||
Stopwatch watch;
|
||||
|
||||
auto lock = tryLockCache();
|
||||
|
||||
/// To avoid deteriorating contention on cache,
|
||||
/// proceed only if cache is not heavily used.
|
||||
if (!lock)
|
||||
{
|
||||
keep_up_free_space_ratio_task->scheduleAfter(lock_failed_reschedule_ms);
|
||||
return;
|
||||
}
|
||||
|
||||
const size_t size_limit = main_priority->getSizeLimit(lock);
|
||||
const size_t elements_limit = main_priority->getElementsLimit(lock);
|
||||
|
||||
const size_t desired_size = std::lround(keep_current_size_to_max_ratio * size_limit);
|
||||
const size_t desired_elements_num = std::lround(keep_current_elements_to_max_ratio * elements_limit);
|
||||
|
||||
if ((size_limit == 0 || main_priority->getSize(lock) <= desired_size)
|
||||
&& (elements_limit == 0 || main_priority->getElementsCount(lock) <= desired_elements_num))
|
||||
{
|
||||
/// Nothing to free - all limits are satisfied.
|
||||
keep_up_free_space_ratio_task->scheduleAfter(space_ratio_satisfied_reschedule_ms);
|
||||
return;
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::FilesystemCacheFreeSpaceKeepingThreadRun);
|
||||
|
||||
FileCacheReserveStat stat;
|
||||
EvictionCandidates eviction_candidates;
|
||||
|
||||
bool limits_satisfied = true;
|
||||
try
|
||||
{
|
||||
/// Collect at most `keep_up_free_space_remove_batch` elements to evict,
|
||||
/// (we use batches to make sure we do not block cache for too long,
|
||||
/// by default the batch size is quite small).
|
||||
limits_satisfied = main_priority->collectCandidatesForEviction(
|
||||
desired_size, desired_elements_num, keep_up_free_space_remove_batch, stat, eviction_candidates, lock);
|
||||
|
||||
#ifdef ABORT_ON_LOGICAL_ERROR
|
||||
/// Let's make sure that we correctly processed the limits.
|
||||
if (limits_satisfied && eviction_candidates.size() < keep_up_free_space_remove_batch)
|
||||
{
|
||||
const auto current_size = main_priority->getSize(lock);
|
||||
chassert(current_size >= stat.total_stat.releasable_size);
|
||||
chassert(!size_limit
|
||||
|| current_size - stat.total_stat.releasable_size <= desired_size);
|
||||
|
||||
const auto current_elements_count = main_priority->getElementsCount(lock);
|
||||
chassert(current_elements_count >= stat.total_stat.releasable_count);
|
||||
chassert(!elements_limit
|
||||
|| current_elements_count - stat.total_stat.releasable_count <= desired_elements_num);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (shutdown)
|
||||
return;
|
||||
|
||||
if (eviction_candidates.size() > 0)
|
||||
{
|
||||
LOG_TRACE(log, "Current usage {}/{} in size, {}/{} in elements count "
|
||||
"(trying to keep size ration at {} and elements ratio at {}). "
|
||||
"Collected {} eviction candidates, "
|
||||
"skipped {} candidates while iterating",
|
||||
main_priority->getSize(lock), size_limit,
|
||||
main_priority->getElementsCount(lock), elements_limit,
|
||||
desired_size, desired_elements_num,
|
||||
eviction_candidates.size(), stat.total_stat.non_releasable_count);
|
||||
|
||||
lock.unlock();
|
||||
|
||||
/// Remove files from filesystem.
|
||||
eviction_candidates.evict();
|
||||
|
||||
/// Take lock again to finalize eviction,
|
||||
/// e.g. to update the in-memory state.
|
||||
lock.lock();
|
||||
eviction_candidates.finalize(nullptr, lock);
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
|
||||
if (eviction_candidates.size() > 0)
|
||||
eviction_candidates.finalize(nullptr, lockCache());
|
||||
|
||||
/// Let's catch such cases in ci,
|
||||
/// in general there should not be exceptions.
|
||||
chassert(false);
|
||||
}
|
||||
|
||||
watch.stop();
|
||||
ProfileEvents::increment(ProfileEvents::FilesystemCacheFreeSpaceKeepingThreadWorkMilliseconds, watch.elapsedMilliseconds());
|
||||
|
||||
LOG_TRACE(log, "Free space ratio keeping thread finished in {} ms", watch.elapsedMilliseconds());
|
||||
|
||||
[[maybe_unused]] bool scheduled = false;
|
||||
if (limits_satisfied)
|
||||
scheduled = keep_up_free_space_ratio_task->scheduleAfter(general_reschedule_ms);
|
||||
else
|
||||
scheduled = keep_up_free_space_ratio_task->schedule();
|
||||
chassert(scheduled);
|
||||
}
|
||||
|
||||
void FileCache::iterate(IterateFunc && func, const UserID & user_id)
|
||||
{
|
||||
return metadata.iterate([&](const LockedKey & locked_key)
|
||||
@ -1275,6 +1401,8 @@ void FileCache::deactivateBackgroundOperations()
|
||||
{
|
||||
shutdown.store(true);
|
||||
metadata.shutdown();
|
||||
if (keep_up_free_space_ratio_task)
|
||||
keep_up_free_space_ratio_task->deactivate();
|
||||
}
|
||||
|
||||
std::vector<FileSegment::Info> FileCache::getFileSegmentInfos(const UserID & user_id)
|
||||
|
@ -189,6 +189,8 @@ public:
|
||||
|
||||
void applySettingsIfPossible(const FileCacheSettings & new_settings, FileCacheSettings & actual_settings);
|
||||
|
||||
void freeSpaceRatioKeepingThreadFunc();
|
||||
|
||||
private:
|
||||
using KeyAndOffset = FileCacheKeyAndOffset;
|
||||
|
||||
@ -198,6 +200,11 @@ private:
|
||||
size_t load_metadata_threads;
|
||||
const bool write_cache_per_user_directory;
|
||||
|
||||
BackgroundSchedulePool::TaskHolder keep_up_free_space_ratio_task;
|
||||
const double keep_current_size_to_max_ratio;
|
||||
const double keep_current_elements_to_max_ratio;
|
||||
const size_t keep_up_free_space_remove_batch;
|
||||
|
||||
LoggerPtr log;
|
||||
|
||||
std::exception_ptr init_exception;
|
||||
|
@ -79,6 +79,15 @@ void FileCacheSettings::loadImpl(FuncHas has, FuncGetUInt get_uint, FuncGetStrin
|
||||
|
||||
if (has("write_cache_per_user_id_directory"))
|
||||
slru_size_ratio = get_uint("write_cache_per_user_id_directory");
|
||||
|
||||
if (has("keep_free_space_size_ratio"))
|
||||
keep_free_space_size_ratio = get_double("keep_free_space_size_ratio");
|
||||
|
||||
if (has("keep_free_space_elements_ratio"))
|
||||
keep_free_space_elements_ratio = get_double("keep_free_space_elements_ratio");
|
||||
|
||||
if (has("keep_free_space_remove_batch"))
|
||||
keep_free_space_elements_ratio = get_uint("keep_free_space_remove_batch");
|
||||
}
|
||||
|
||||
void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix)
|
||||
|
@ -38,6 +38,10 @@ struct FileCacheSettings
|
||||
std::string cache_policy = "LRU";
|
||||
double slru_size_ratio = 0.5;
|
||||
|
||||
double keep_free_space_size_ratio = FILECACHE_DEFAULT_FREE_SPACE_SIZE_RATIO;
|
||||
double keep_free_space_elements_ratio = FILECACHE_DEFAULT_FREE_SPACE_ELEMENTS_RATIO;
|
||||
size_t keep_free_space_remove_batch = FILECACHE_DEFAULT_FREE_SPACE_REMOVE_BATCH;
|
||||
|
||||
void loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
|
||||
void loadFromCollection(const NamedCollection & collection);
|
||||
|
||||
|
@ -12,6 +12,9 @@ static constexpr int FILECACHE_DEFAULT_LOAD_METADATA_THREADS = 16;
|
||||
static constexpr int FILECACHE_DEFAULT_MAX_ELEMENTS = 10000000;
|
||||
static constexpr int FILECACHE_DEFAULT_HITS_THRESHOLD = 0;
|
||||
static constexpr size_t FILECACHE_BYPASS_THRESHOLD = 256 * 1024 * 1024;
|
||||
static constexpr double FILECACHE_DEFAULT_FREE_SPACE_SIZE_RATIO = 0; /// Disabled.
|
||||
static constexpr double FILECACHE_DEFAULT_FREE_SPACE_ELEMENTS_RATIO = 0; /// Disabled.
|
||||
static constexpr int FILECACHE_DEFAULT_FREE_SPACE_REMOVE_BATCH = 10;
|
||||
|
||||
class FileCache;
|
||||
using FileCachePtr = std::shared_ptr<FileCache>;
|
||||
|
@ -137,6 +137,8 @@ public:
|
||||
|
||||
virtual PriorityDumpPtr dump(const CachePriorityGuard::Lock &) = 0;
|
||||
|
||||
/// Collect eviction candidates sufficient to free `size` bytes
|
||||
/// and `elements` elements from cache.
|
||||
virtual bool collectCandidatesForEviction(
|
||||
size_t size,
|
||||
size_t elements,
|
||||
@ -146,7 +148,10 @@ public:
|
||||
const UserID & user_id,
|
||||
const CachePriorityGuard::Lock &) = 0;
|
||||
|
||||
/// Collect eviction `candidates_num` candidates for eviction.
|
||||
/// Collect eviction candidates sufficient to have `desired_size`
|
||||
/// and `desired_elements_num` as current cache state.
|
||||
/// Collect no more than `max_candidates_to_evict` elements.
|
||||
/// Return `true` if the first condition is satisfied.
|
||||
virtual bool collectCandidatesForEviction(
|
||||
size_t desired_size,
|
||||
size_t desired_elements_count,
|
||||
|
@ -284,6 +284,7 @@ bool LRUFileCachePriority::collectCandidatesForEviction(
|
||||
};
|
||||
|
||||
iterateForEviction(res, stat, can_fit, lock);
|
||||
|
||||
if (can_fit())
|
||||
{
|
||||
/// `res` contains eviction candidates. Do we have any?
|
||||
@ -330,14 +331,17 @@ bool LRUFileCachePriority::collectCandidatesForEviction(
|
||||
EvictionCandidates & res,
|
||||
const CachePriorityGuard::Lock & lock)
|
||||
{
|
||||
auto stop_condition = [&, this]()
|
||||
auto desired_limits_satisfied = [&]()
|
||||
{
|
||||
return canFit(0, 0, stat.total_stat.releasable_size, stat.total_stat.releasable_count,
|
||||
lock, &desired_size, &desired_elements_count)
|
||||
|| (max_candidates_to_evict && res.size() >= max_candidates_to_evict);
|
||||
lock, &desired_size, &desired_elements_count);
|
||||
};
|
||||
auto stop_condition = [&]()
|
||||
{
|
||||
return desired_limits_satisfied() || (max_candidates_to_evict && res.size() >= max_candidates_to_evict);
|
||||
};
|
||||
iterateForEviction(res, stat, stop_condition, lock);
|
||||
return stop_condition();
|
||||
return desired_limits_satisfied();
|
||||
}
|
||||
|
||||
void LRUFileCachePriority::iterateForEviction(
|
||||
|
@ -24,6 +24,8 @@
|
||||
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
|
||||
<cache_policy>LRU</cache_policy>
|
||||
<slru_size_ratio>0.3</slru_size_ratio>
|
||||
<keep_free_space_size_ratio>0.15</keep_free_space_size_ratio>
|
||||
<keep_free_space_elements_ratio>0.15</keep_free_space_elements_ratio>
|
||||
</s3_cache>
|
||||
<s3_cache_02933>
|
||||
<type>cache</type>
|
||||
|
@ -501,3 +501,60 @@ INSERT INTO test SELECT 1, 'test';
|
||||
node.query("SELECT * FROM test FORMAT Null")
|
||||
|
||||
assert key not in node.query("SYSTEM SYNC FILESYSTEM CACHE")
|
||||
|
||||
|
||||
def test_keep_up_size_ratio(cluster):
|
||||
node = cluster.instances["node"]
|
||||
max_elements = 20
|
||||
elements_ratio = 0.5
|
||||
cache_name = "keep_up_size_ratio"
|
||||
node.query(
|
||||
f"""
|
||||
DROP TABLE IF EXISTS test;
|
||||
|
||||
CREATE TABLE test (a String)
|
||||
ENGINE = MergeTree() ORDER BY tuple()
|
||||
SETTINGS disk = disk(type = cache,
|
||||
name = {cache_name},
|
||||
max_size = '100Ki',
|
||||
max_elements = {max_elements},
|
||||
max_file_segment_size = 10,
|
||||
boundary_alignment = 10,
|
||||
path = "test_keep_up_size_ratio",
|
||||
keep_free_space_size_ratio = 0.5,
|
||||
keep_free_space_elements_ratio = {elements_ratio},
|
||||
disk = hdd_blob),
|
||||
min_bytes_for_wide_part = 10485760;
|
||||
|
||||
INSERT INTO test SELECT randomString(200);
|
||||
"""
|
||||
)
|
||||
|
||||
query_id = "test_keep_up_size_ratio_1"
|
||||
node.query(
|
||||
"SELECT * FROM test FORMAT Null SETTINGS enable_filesystem_cache_log = 1",
|
||||
query_id=query_id,
|
||||
)
|
||||
count = int(
|
||||
node.query(
|
||||
f"""
|
||||
SYSTEM FLUSH LOGS;
|
||||
SELECT uniqExact(concat(key, toString(offset)))
|
||||
FROM system.filesystem_cache_log
|
||||
WHERE read_type = 'READ_FROM_FS_AND_DOWNLOADED_TO_CACHE';
|
||||
"""
|
||||
)
|
||||
)
|
||||
assert count > max_elements
|
||||
|
||||
expected = 10
|
||||
for _ in range(100):
|
||||
elements = int(
|
||||
node.query(
|
||||
f"SELECT count() FROM system.filesystem_cache WHERE cache_name = '{cache_name}'"
|
||||
)
|
||||
)
|
||||
if elements <= expected:
|
||||
break
|
||||
time.sleep(1)
|
||||
assert elements <= expected
|
||||
|
Loading…
Reference in New Issue
Block a user