2022-04-28 05:56:26 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <Common/ICachePolicy.h>
|
|
|
|
|
|
|
|
#include <list>
|
2022-04-28 07:48:15 +00:00
|
|
|
#include <unordered_map>
|
2022-04-28 05:56:26 +00:00
|
|
|
|
2022-04-30 16:55:26 +00:00
|
|
|
#include <Common/logger_useful.h>
|
2022-04-28 05:56:26 +00:00
|
|
|
|
2022-04-28 07:48:15 +00:00
|
|
|
namespace DB
|
2022-04-28 05:56:26 +00:00
|
|
|
{
|
|
|
|
|
|
|
|
/// Cache policy SLRU evicts entries which were used only once and are not used for a long time,
|
|
|
|
/// this policy protects entries which were used more then once from a sequential scan.
|
|
|
|
/// WeightFunction is a functor that takes Mapped as a parameter and returns "weight" (approximate size)
|
|
|
|
/// of that value.
|
2023-03-12 13:48:16 +00:00
|
|
|
/// Cache starts to evict entries when their total weight exceeds max_size_in_bytes.
|
2022-04-28 05:56:26 +00:00
|
|
|
/// Value weight should not change after insertion.
|
2022-04-28 07:48:15 +00:00
|
|
|
/// To work with the thread-safe implementation of this class use a class "CacheBase" with first parameter "SLRU"
|
2022-04-28 05:56:26 +00:00
|
|
|
/// and next parameters in the same order as in the constructor of the current class.
|
2023-03-13 07:28:02 +00:00
|
|
|
template <typename Key, typename Mapped, typename HashFunction = std::hash<Key>, typename WeightFunction = EqualWeightFunction<Mapped>>
|
|
|
|
class SLRUCachePolicy : public ICachePolicy<Key, Mapped, HashFunction, WeightFunction>
|
2022-04-28 05:56:26 +00:00
|
|
|
{
|
|
|
|
public:
|
2023-03-13 07:28:02 +00:00
|
|
|
using Base = ICachePolicy<Key, Mapped, HashFunction, WeightFunction>;
|
2023-03-13 12:10:22 +00:00
|
|
|
using typename Base::MappedPtr;
|
2022-04-28 05:56:26 +00:00
|
|
|
using typename Base::OnWeightLossFunction;
|
|
|
|
|
2023-03-12 13:48:16 +00:00
|
|
|
/** Initialize SLRUCachePolicy with max_size_in_bytes and max_protected_size.
|
2022-04-28 05:56:26 +00:00
|
|
|
* max_protected_size shows how many of the most frequently used entries will not be evicted after a sequential scan.
|
|
|
|
* max_protected_size == 0 means that the default protected size is equal to half of the total max size.
|
|
|
|
*/
|
2022-09-02 08:54:48 +00:00
|
|
|
/// TODO: construct from special struct with cache policy parameters (also with max_protected_size).
|
2023-03-13 07:02:52 +00:00
|
|
|
SLRUCachePolicy(size_t max_size_in_bytes_, size_t max_entries_, double size_ratio, OnWeightLossFunction on_weight_loss_function_)
|
2023-03-12 13:48:16 +00:00
|
|
|
: max_protected_size(static_cast<size_t>(max_size_in_bytes_ * std::min(1.0, size_ratio)))
|
|
|
|
, max_size_in_bytes(max_size_in_bytes_)
|
2023-03-12 13:57:10 +00:00
|
|
|
, max_entries(max_entries_)
|
2023-03-13 07:33:41 +00:00
|
|
|
, on_weight_loss_function(on_weight_loss_function_)
|
|
|
|
{
|
|
|
|
}
|
2022-04-28 05:56:26 +00:00
|
|
|
|
2022-08-07 17:16:07 +00:00
|
|
|
size_t weight(std::lock_guard<std::mutex> & /* cache_lock */) const override
|
2022-04-28 05:56:26 +00:00
|
|
|
{
|
2023-03-12 13:48:16 +00:00
|
|
|
return current_size_in_bytes;
|
2022-04-28 05:56:26 +00:00
|
|
|
}
|
|
|
|
|
2022-08-07 17:16:07 +00:00
|
|
|
size_t count(std::lock_guard<std::mutex> & /* cache_lock */) const override
|
2022-04-28 05:56:26 +00:00
|
|
|
{
|
|
|
|
return cells.size();
|
|
|
|
}
|
|
|
|
|
2023-03-13 14:42:06 +00:00
|
|
|
size_t maxSize(std::lock_guard<std::mutex> & /* cache_lock */) const override
|
2022-04-28 05:56:26 +00:00
|
|
|
{
|
2023-03-12 13:48:16 +00:00
|
|
|
return max_size_in_bytes;
|
2022-04-28 05:56:26 +00:00
|
|
|
}
|
|
|
|
|
2022-08-07 17:16:07 +00:00
|
|
|
void reset(std::lock_guard<std::mutex> & /* cache_lock */) override
|
2022-04-28 05:56:26 +00:00
|
|
|
{
|
|
|
|
cells.clear();
|
|
|
|
probationary_queue.clear();
|
|
|
|
protected_queue.clear();
|
2023-03-12 13:48:16 +00:00
|
|
|
current_size_in_bytes = 0;
|
2022-04-28 05:56:26 +00:00
|
|
|
current_protected_size = 0;
|
|
|
|
}
|
|
|
|
|
2022-08-07 17:16:07 +00:00
|
|
|
void remove(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */) override
|
2022-04-28 05:56:26 +00:00
|
|
|
{
|
|
|
|
auto it = cells.find(key);
|
|
|
|
if (it == cells.end())
|
|
|
|
return;
|
|
|
|
auto & cell = it->second;
|
2023-03-12 13:48:16 +00:00
|
|
|
current_size_in_bytes -= cell.size;
|
2022-05-17 08:11:02 +00:00
|
|
|
if (cell.is_protected)
|
|
|
|
{
|
|
|
|
current_protected_size -= cell.size;
|
|
|
|
}
|
2022-04-28 05:56:26 +00:00
|
|
|
auto & queue = cell.is_protected ? protected_queue : probationary_queue;
|
|
|
|
queue.erase(cell.queue_iterator);
|
|
|
|
cells.erase(it);
|
|
|
|
}
|
|
|
|
|
2022-08-07 17:16:07 +00:00
|
|
|
MappedPtr get(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */) override
|
2022-04-28 05:56:26 +00:00
|
|
|
{
|
|
|
|
auto it = cells.find(key);
|
|
|
|
if (it == cells.end())
|
|
|
|
{
|
|
|
|
return MappedPtr();
|
|
|
|
}
|
|
|
|
|
|
|
|
Cell & cell = it->second;
|
|
|
|
|
|
|
|
if (cell.is_protected)
|
|
|
|
{
|
|
|
|
protected_queue.splice(protected_queue.end(), protected_queue, cell.queue_iterator);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
cell.is_protected = true;
|
|
|
|
current_protected_size += cell.size;
|
|
|
|
protected_queue.splice(protected_queue.end(), probationary_queue, cell.queue_iterator);
|
2022-08-07 17:16:07 +00:00
|
|
|
removeOverflow(protected_queue, max_protected_size, current_protected_size, /*is_protected=*/true);
|
2022-04-28 05:56:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return cell.value;
|
|
|
|
}
|
|
|
|
|
2022-08-07 17:16:07 +00:00
|
|
|
void set(const Key & key, const MappedPtr & mapped, std::lock_guard<std::mutex> & /* cache_lock */) override
|
2022-04-28 05:56:26 +00:00
|
|
|
{
|
|
|
|
auto [it, inserted] = cells.emplace(std::piecewise_construct,
|
|
|
|
std::forward_as_tuple(key),
|
|
|
|
std::forward_as_tuple());
|
|
|
|
|
|
|
|
Cell & cell = it->second;
|
|
|
|
|
|
|
|
if (inserted)
|
|
|
|
{
|
|
|
|
try
|
|
|
|
{
|
|
|
|
cell.queue_iterator = probationary_queue.insert(probationary_queue.end(), key);
|
|
|
|
}
|
|
|
|
catch (...)
|
|
|
|
{
|
|
|
|
cells.erase(it);
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2023-03-12 13:48:16 +00:00
|
|
|
current_size_in_bytes -= cell.size;
|
2022-04-28 05:56:26 +00:00
|
|
|
if (cell.is_protected)
|
|
|
|
{
|
|
|
|
current_protected_size -= cell.size;
|
|
|
|
protected_queue.splice(protected_queue.end(), protected_queue, cell.queue_iterator);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
cell.is_protected = true;
|
|
|
|
protected_queue.splice(protected_queue.end(), probationary_queue, cell.queue_iterator);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
cell.value = mapped;
|
|
|
|
cell.size = cell.value ? weight_function(*cell.value) : 0;
|
2023-03-12 13:48:16 +00:00
|
|
|
current_size_in_bytes += cell.size;
|
2022-04-28 05:56:26 +00:00
|
|
|
current_protected_size += cell.is_protected ? cell.size : 0;
|
|
|
|
|
2022-04-30 11:53:59 +00:00
|
|
|
removeOverflow(protected_queue, max_protected_size, current_protected_size, /*is_protected=*/true);
|
2023-03-12 13:48:16 +00:00
|
|
|
removeOverflow(probationary_queue, max_size_in_bytes, current_size_in_bytes, /*is_protected=*/false);
|
2022-04-28 05:56:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
protected:
|
|
|
|
using SLRUQueue = std::list<Key>;
|
|
|
|
using SLRUQueueIterator = typename SLRUQueue::iterator;
|
|
|
|
|
|
|
|
SLRUQueue probationary_queue;
|
|
|
|
SLRUQueue protected_queue;
|
|
|
|
|
|
|
|
struct Cell
|
|
|
|
{
|
2022-08-07 17:16:07 +00:00
|
|
|
bool is_protected = false;
|
2022-04-28 05:56:26 +00:00
|
|
|
MappedPtr value;
|
|
|
|
size_t size;
|
|
|
|
SLRUQueueIterator queue_iterator;
|
|
|
|
};
|
|
|
|
|
|
|
|
using Cells = std::unordered_map<Key, Cell, HashFunction>;
|
|
|
|
|
|
|
|
Cells cells;
|
|
|
|
|
|
|
|
size_t current_protected_size = 0;
|
2023-03-12 13:48:16 +00:00
|
|
|
size_t current_size_in_bytes = 0;
|
2022-04-28 05:56:26 +00:00
|
|
|
const size_t max_protected_size;
|
2023-03-12 13:48:16 +00:00
|
|
|
const size_t max_size_in_bytes;
|
2023-03-12 13:57:10 +00:00
|
|
|
const size_t max_entries;
|
2022-04-28 07:48:15 +00:00
|
|
|
|
2022-04-28 05:56:26 +00:00
|
|
|
WeightFunction weight_function;
|
2023-03-13 07:33:41 +00:00
|
|
|
OnWeightLossFunction on_weight_loss_function;
|
2022-04-28 05:56:26 +00:00
|
|
|
|
2022-04-30 11:53:59 +00:00
|
|
|
void removeOverflow(SLRUQueue & queue, const size_t max_weight_size, size_t & current_weight_size, bool is_protected)
|
2022-04-28 05:56:26 +00:00
|
|
|
{
|
|
|
|
size_t current_weight_lost = 0;
|
|
|
|
size_t queue_size = queue.size();
|
|
|
|
|
2022-08-08 20:53:02 +00:00
|
|
|
std::function<bool()> need_remove;
|
|
|
|
if (is_protected)
|
2022-04-30 11:53:59 +00:00
|
|
|
{
|
2022-08-08 20:53:02 +00:00
|
|
|
/// Check if after remove all elements from probationary part there will be no more than max elements
|
|
|
|
/// in protected queue and weight of all protected elements will be less then max protected weight.
|
2023-03-12 13:57:10 +00:00
|
|
|
/// It's not possible to check only cells.size() > max_entries
|
2022-08-08 20:53:02 +00:00
|
|
|
/// because protected elements move to probationary part and still remain in cache.
|
|
|
|
need_remove = [&]()
|
2022-04-30 11:53:59 +00:00
|
|
|
{
|
2023-03-12 13:57:10 +00:00
|
|
|
return ((max_entries != 0 && cells.size() - probationary_queue.size() > max_entries)
|
2022-08-08 20:53:02 +00:00
|
|
|
|| (current_weight_size > max_weight_size)) && (queue_size > 0);
|
|
|
|
};
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
need_remove = [&]()
|
|
|
|
{
|
2023-03-12 13:57:10 +00:00
|
|
|
return ((max_entries != 0 && cells.size() > max_entries)
|
2022-08-08 20:53:02 +00:00
|
|
|
|| (current_weight_size > max_weight_size)) && (queue_size > 0);
|
|
|
|
};
|
|
|
|
}
|
2022-04-30 11:53:59 +00:00
|
|
|
|
|
|
|
while (need_remove())
|
2022-04-28 05:56:26 +00:00
|
|
|
{
|
|
|
|
const Key & key = queue.front();
|
|
|
|
|
|
|
|
auto it = cells.find(key);
|
|
|
|
if (it == cells.end())
|
|
|
|
{
|
|
|
|
LOG_ERROR(&Poco::Logger::get("SLRUCache"), "SLRUCache became inconsistent. There must be a bug in it.");
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
|
|
|
auto & cell = it->second;
|
|
|
|
|
|
|
|
current_weight_size -= cell.size;
|
|
|
|
|
|
|
|
if (cell.is_protected)
|
|
|
|
{
|
|
|
|
cell.is_protected = false;
|
|
|
|
probationary_queue.splice(probationary_queue.end(), queue, cell.queue_iterator);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
current_weight_lost += cell.size;
|
|
|
|
cells.erase(it);
|
|
|
|
queue.pop_front();
|
|
|
|
}
|
|
|
|
|
|
|
|
--queue_size;
|
|
|
|
}
|
|
|
|
|
2022-04-30 11:53:59 +00:00
|
|
|
if (!is_protected)
|
2023-03-13 07:33:41 +00:00
|
|
|
on_weight_loss_function(current_weight_lost);
|
2022-04-28 05:56:26 +00:00
|
|
|
|
2023-03-12 13:48:16 +00:00
|
|
|
if (current_size_in_bytes > (1ull << 63))
|
2022-04-28 05:56:26 +00:00
|
|
|
{
|
|
|
|
LOG_ERROR(&Poco::Logger::get("SLRUCache"), "SLRUCache became inconsistent. There must be a bug in it.");
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
}
|