ClickHouse/src/Common/SLRUCachePolicy.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

252 lines
8.2 KiB
C++
Raw Normal View History

#pragma once
#include <Common/ICachePolicy.h>
#include <list>
2022-04-28 07:48:15 +00:00
#include <unordered_map>
2022-04-30 16:55:26 +00:00
#include <Common/logger_useful.h>
2022-04-28 07:48:15 +00:00
namespace DB
{
/// Cache policy SLRU evicts entries which were used only once and are not used for a long time,
/// this policy protects entries which were used more then once from a sequential scan.
/// WeightFunction is a functor that takes Mapped as a parameter and returns "weight" (approximate size)
/// of that value.
/// Cache starts to evict entries when their total weight exceeds max_size.
/// Value weight should not change after insertion.
2022-04-28 07:48:15 +00:00
/// To work with the thread-safe implementation of this class use a class "CacheBase" with first parameter "SLRU"
/// and next parameters in the same order as in the constructor of the current class.
template <typename TKey, typename TMapped, typename HashFunction = std::hash<TKey>, typename WeightFunction = TrivialWeightFunction<TMapped>>
2022-04-28 07:48:15 +00:00
class SLRUCachePolicy : public ICachePolicy<TKey, TMapped, HashFunction, WeightFunction>
{
public:
using Key = TKey;
using Mapped = TMapped;
using MappedPtr = std::shared_ptr<Mapped>;
using Base = ICachePolicy<TKey, TMapped, HashFunction, WeightFunction>;
using typename Base::OnWeightLossFunction;
/** Initialize SLRUCachePolicy with max_size and max_protected_size.
* max_protected_size shows how many of the most frequently used entries will not be evicted after a sequential scan.
* max_protected_size == 0 means that the default protected size is equal to half of the total max size.
*/
2022-09-02 08:54:48 +00:00
/// TODO: construct from special struct with cache policy parameters (also with max_protected_size).
2022-08-07 17:16:07 +00:00
SLRUCachePolicy(size_t max_size_, size_t max_elements_size_ = 0, double size_ratio = 0.5, OnWeightLossFunction on_weight_loss_function_ = {})
2022-09-10 02:07:51 +00:00
: max_protected_size(static_cast<size_t>(max_size_ * std::min(1.0, size_ratio)))
2022-08-07 17:16:07 +00:00
, max_size(max_size_)
, max_elements_size(max_elements_size_)
2022-08-07 17:16:07 +00:00
{
Base::on_weight_loss_function = on_weight_loss_function_;
}
2022-08-07 17:16:07 +00:00
size_t weight(std::lock_guard<std::mutex> & /* cache_lock */) const override
{
return current_size;
}
2022-08-07 17:16:07 +00:00
size_t count(std::lock_guard<std::mutex> & /* cache_lock */) const override
{
return cells.size();
}
2022-05-15 03:38:44 +00:00
size_t maxSize() const override
{
return max_size;
}
2022-08-07 17:16:07 +00:00
void reset(std::lock_guard<std::mutex> & /* cache_lock */) override
{
cells.clear();
probationary_queue.clear();
protected_queue.clear();
current_size = 0;
current_protected_size = 0;
}
2022-08-07 17:16:07 +00:00
void remove(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */) override
{
auto it = cells.find(key);
if (it == cells.end())
return;
auto & cell = it->second;
current_size -= cell.size;
if (cell.is_protected)
{
current_protected_size -= cell.size;
}
auto & queue = cell.is_protected ? protected_queue : probationary_queue;
queue.erase(cell.queue_iterator);
cells.erase(it);
}
2022-08-07 17:16:07 +00:00
MappedPtr get(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */) override
{
auto it = cells.find(key);
if (it == cells.end())
{
return MappedPtr();
}
Cell & cell = it->second;
if (cell.is_protected)
{
protected_queue.splice(protected_queue.end(), protected_queue, cell.queue_iterator);
}
else
{
cell.is_protected = true;
current_protected_size += cell.size;
protected_queue.splice(protected_queue.end(), probationary_queue, cell.queue_iterator);
2022-08-07 17:16:07 +00:00
removeOverflow(protected_queue, max_protected_size, current_protected_size, /*is_protected=*/true);
}
return cell.value;
}
2022-08-07 17:16:07 +00:00
void set(const Key & key, const MappedPtr & mapped, std::lock_guard<std::mutex> & /* cache_lock */) override
{
auto [it, inserted] = cells.emplace(std::piecewise_construct,
std::forward_as_tuple(key),
std::forward_as_tuple());
Cell & cell = it->second;
if (inserted)
{
try
{
cell.queue_iterator = probationary_queue.insert(probationary_queue.end(), key);
}
catch (...)
{
cells.erase(it);
throw;
}
}
else
{
current_size -= cell.size;
if (cell.is_protected)
{
current_protected_size -= cell.size;
protected_queue.splice(protected_queue.end(), protected_queue, cell.queue_iterator);
}
else
{
cell.is_protected = true;
protected_queue.splice(protected_queue.end(), probationary_queue, cell.queue_iterator);
}
}
cell.value = mapped;
cell.size = cell.value ? weight_function(*cell.value) : 0;
current_size += cell.size;
current_protected_size += cell.is_protected ? cell.size : 0;
removeOverflow(protected_queue, max_protected_size, current_protected_size, /*is_protected=*/true);
removeOverflow(probationary_queue, max_size, current_size, /*is_protected=*/false);
}
protected:
using SLRUQueue = std::list<Key>;
using SLRUQueueIterator = typename SLRUQueue::iterator;
SLRUQueue probationary_queue;
SLRUQueue protected_queue;
struct Cell
{
2022-08-07 17:16:07 +00:00
bool is_protected = false;
MappedPtr value;
size_t size;
SLRUQueueIterator queue_iterator;
};
using Cells = std::unordered_map<Key, Cell, HashFunction>;
Cells cells;
size_t current_protected_size = 0;
size_t current_size = 0;
const size_t max_protected_size;
const size_t max_size;
const size_t max_elements_size;
2022-04-28 07:48:15 +00:00
WeightFunction weight_function;
void removeOverflow(SLRUQueue & queue, const size_t max_weight_size, size_t & current_weight_size, bool is_protected)
{
size_t current_weight_lost = 0;
size_t queue_size = queue.size();
2022-08-08 20:53:02 +00:00
std::function<bool()> need_remove;
if (is_protected)
{
2022-08-08 20:53:02 +00:00
/// Check if after remove all elements from probationary part there will be no more than max elements
/// in protected queue and weight of all protected elements will be less then max protected weight.
/// It's not possible to check only cells.size() > max_elements_size
/// because protected elements move to probationary part and still remain in cache.
need_remove = [&]()
{
2022-08-07 17:16:07 +00:00
return ((max_elements_size != 0 && cells.size() - probationary_queue.size() > max_elements_size)
2022-08-08 20:53:02 +00:00
|| (current_weight_size > max_weight_size)) && (queue_size > 0);
};
}
else
{
need_remove = [&]()
{
return ((max_elements_size != 0 && cells.size() > max_elements_size)
|| (current_weight_size > max_weight_size)) && (queue_size > 0);
};
}
while (need_remove())
{
const Key & key = queue.front();
auto it = cells.find(key);
if (it == cells.end())
{
LOG_ERROR(&Poco::Logger::get("SLRUCache"), "SLRUCache became inconsistent. There must be a bug in it.");
abort();
}
auto & cell = it->second;
current_weight_size -= cell.size;
if (cell.is_protected)
{
cell.is_protected = false;
probationary_queue.splice(probationary_queue.end(), queue, cell.queue_iterator);
}
else
{
current_weight_lost += cell.size;
cells.erase(it);
queue.pop_front();
}
--queue_size;
}
if (!is_protected)
2022-04-28 07:48:15 +00:00
{
Base::on_weight_loss_function(current_weight_lost);
}
if (current_size > (1ull << 63))
{
LOG_ERROR(&Poco::Logger::get("SLRUCache"), "SLRUCache became inconsistent. There must be a bug in it.");
abort();
}
}
};
}