2021-12-29 07:25:33 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <atomic>
|
|
|
|
#include <chrono>
|
|
|
|
#include <list>
|
|
|
|
#include <memory>
|
|
|
|
#include <mutex>
|
|
|
|
#include <unordered_map>
|
|
|
|
#include <unordered_set>
|
2022-04-27 15:05:45 +00:00
|
|
|
#include <Common/logger_useful.h>
|
2021-12-29 07:25:33 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
template <typename T>
|
2022-01-05 06:29:34 +00:00
|
|
|
struct TrivialLRUResourceCacheWeightFunction
|
2021-12-29 07:25:33 +00:00
|
|
|
{
|
2022-01-11 06:34:36 +00:00
|
|
|
size_t operator()(const T &) const noexcept { return 1; }
|
2021-12-29 07:25:33 +00:00
|
|
|
};
|
|
|
|
|
2022-01-05 06:29:34 +00:00
|
|
|
template <typename T>
|
|
|
|
struct TrivialLRUResourceCacheReleaseFunction
|
|
|
|
{
|
2022-01-11 06:34:36 +00:00
|
|
|
void operator()(std::shared_ptr<T>) noexcept { }
|
2022-01-05 06:29:34 +00:00
|
|
|
};
|
|
|
|
|
2021-12-30 20:29:06 +00:00
|
|
|
/**
|
2022-04-30 11:53:59 +00:00
|
|
|
* Similar to implementation in LRUCachePolicy.h, but with the difference that keys can
|
2021-12-30 20:29:06 +00:00
|
|
|
* only be evicted when they are releasable. Release state is controlled by this implementation.
|
|
|
|
* get() and getOrSet() methods return a Holder to actual value, which does release() in destructor.
|
2021-12-29 07:25:33 +00:00
|
|
|
*
|
2021-12-30 20:29:06 +00:00
|
|
|
* Warning (!): This implementation is in development, not to be used.
|
2021-12-29 07:25:33 +00:00
|
|
|
*/
|
2022-01-05 06:40:02 +00:00
|
|
|
template <
|
|
|
|
typename TKey,
|
2021-12-29 07:25:33 +00:00
|
|
|
typename TMapped,
|
2022-01-05 06:29:34 +00:00
|
|
|
typename WeightFunction = TrivialLRUResourceCacheWeightFunction<TMapped>,
|
|
|
|
typename ReleaseFunction = TrivialLRUResourceCacheReleaseFunction<TMapped>,
|
2021-12-29 07:25:33 +00:00
|
|
|
typename HashFunction = std::hash<TKey>>
|
|
|
|
class LRUResourceCache
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
using Key = TKey;
|
|
|
|
using Mapped = TMapped;
|
|
|
|
using MappedPtr = std::shared_ptr<Mapped>;
|
2021-12-30 20:29:06 +00:00
|
|
|
|
2021-12-29 10:18:38 +00:00
|
|
|
class MappedHolder
|
|
|
|
{
|
|
|
|
public:
|
2022-01-05 06:40:02 +00:00
|
|
|
MappedHolder(LRUResourceCache * cache_, const Key & key_, MappedPtr value_) : cache(cache_), key(key_), val(value_) { }
|
2021-12-30 20:29:06 +00:00
|
|
|
|
2021-12-29 10:27:52 +00:00
|
|
|
~MappedHolder() { cache->release(key); }
|
2021-12-30 20:29:06 +00:00
|
|
|
|
|
|
|
Mapped & value() { return *val; }
|
2021-12-29 10:27:52 +00:00
|
|
|
|
2021-12-29 10:18:38 +00:00
|
|
|
protected:
|
|
|
|
LRUResourceCache * cache;
|
|
|
|
Key key;
|
|
|
|
MappedPtr val;
|
|
|
|
};
|
2021-12-30 20:29:06 +00:00
|
|
|
|
2021-12-29 10:18:38 +00:00
|
|
|
using MappedHolderPtr = std::unique_ptr<MappedHolder>;
|
|
|
|
|
2021-12-30 20:29:06 +00:00
|
|
|
explicit LRUResourceCache(size_t max_weight_, size_t max_element_size_ = 0)
|
2022-01-05 06:40:02 +00:00
|
|
|
: max_weight(max_weight_), max_element_size(max_element_size_)
|
|
|
|
{
|
|
|
|
}
|
2021-12-30 20:29:06 +00:00
|
|
|
|
2021-12-29 10:18:38 +00:00
|
|
|
MappedHolderPtr get(const Key & key)
|
|
|
|
{
|
2021-12-30 02:08:33 +00:00
|
|
|
auto mapped_ptr = getImpl(key);
|
|
|
|
if (!mapped_ptr)
|
2021-12-29 10:18:38 +00:00
|
|
|
return nullptr;
|
2021-12-30 02:08:33 +00:00
|
|
|
return std::make_unique<MappedHolder>(this, key, mapped_ptr);
|
2021-12-29 10:18:38 +00:00
|
|
|
}
|
2021-12-30 20:29:06 +00:00
|
|
|
|
2021-12-29 10:27:52 +00:00
|
|
|
template <typename LoadFunc>
|
2021-12-29 10:18:38 +00:00
|
|
|
MappedHolderPtr getOrSet(const Key & key, LoadFunc && load_func)
|
|
|
|
{
|
2021-12-30 10:43:12 +00:00
|
|
|
auto mapped_ptr = getImpl(key, load_func);
|
|
|
|
if (!mapped_ptr)
|
2021-12-29 10:18:38 +00:00
|
|
|
return nullptr;
|
2021-12-30 10:43:12 +00:00
|
|
|
return std::make_unique<MappedHolder>(this, key, mapped_ptr);
|
2021-12-29 10:18:38 +00:00
|
|
|
}
|
|
|
|
|
2021-12-30 20:29:06 +00:00
|
|
|
// If the key's reference_count = 0, delete it immediately.
|
|
|
|
// Otherwise, mark it expired (not visible to get()), and delete when refcount is 0.
|
2021-12-30 02:08:33 +00:00
|
|
|
void tryRemove(const Key & key)
|
|
|
|
{
|
|
|
|
std::lock_guard lock(mutex);
|
|
|
|
auto it = cells.find(key);
|
|
|
|
if (it == cells.end())
|
|
|
|
return;
|
|
|
|
auto & cell = it->second;
|
|
|
|
if (cell.reference_count == 0)
|
|
|
|
{
|
|
|
|
queue.erase(cell.queue_iterator);
|
|
|
|
current_weight -= cell.weight;
|
2022-01-05 06:29:34 +00:00
|
|
|
release_function(cell.value);
|
2021-12-30 02:08:33 +00:00
|
|
|
cells.erase(it);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
cell.expired = true;
|
|
|
|
}
|
|
|
|
|
2021-12-29 10:18:38 +00:00
|
|
|
size_t weight()
|
|
|
|
{
|
|
|
|
std::lock_guard lock(mutex);
|
|
|
|
return current_weight;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t size()
|
|
|
|
{
|
|
|
|
std::lock_guard lock(mutex);
|
|
|
|
return cells.size();
|
|
|
|
}
|
|
|
|
|
|
|
|
void getStats(size_t & out_hits, size_t & out_misses, size_t & out_evict_count) const
|
|
|
|
{
|
|
|
|
out_hits = hits;
|
|
|
|
out_misses = misses;
|
|
|
|
out_evict_count = evict_count;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
mutable std::mutex mutex;
|
|
|
|
|
|
|
|
using LRUQueue = std::list<Key>;
|
|
|
|
using LRUQueueIterator = typename LRUQueue::iterator;
|
|
|
|
|
|
|
|
struct Cell
|
|
|
|
{
|
|
|
|
MappedPtr value;
|
|
|
|
size_t weight = 0;
|
|
|
|
LRUQueueIterator queue_iterator;
|
|
|
|
size_t reference_count = 0;
|
2021-12-30 02:08:33 +00:00
|
|
|
bool expired = false;
|
2021-12-29 10:18:38 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
using Cells = std::unordered_map<Key, Cell, HashFunction>;
|
|
|
|
Cells cells;
|
|
|
|
LRUQueue queue;
|
|
|
|
size_t current_weight = 0;
|
|
|
|
size_t max_weight = 0;
|
|
|
|
size_t max_element_size = 0;
|
2021-12-29 07:25:33 +00:00
|
|
|
|
2021-12-30 10:43:12 +00:00
|
|
|
/// Represents pending insertion attempt.
|
2021-12-29 10:18:38 +00:00
|
|
|
struct InsertToken
|
|
|
|
{
|
2021-12-30 10:43:12 +00:00
|
|
|
explicit InsertToken(LRUResourceCache & cache_) : cache(cache_) { }
|
|
|
|
|
2021-12-29 10:18:38 +00:00
|
|
|
std::mutex mutex;
|
2021-12-30 10:43:12 +00:00
|
|
|
bool cleaned_up = false; /// Protected by the token mutex
|
|
|
|
MappedPtr value; /// Protected by the token mutex
|
|
|
|
|
|
|
|
LRUResourceCache & cache;
|
|
|
|
size_t refcount = 0; /// Protected by the cache mutex
|
|
|
|
};
|
|
|
|
|
|
|
|
using InsertTokenById = std::unordered_map<Key, std::shared_ptr<InsertToken>, HashFunction>;
|
|
|
|
|
|
|
|
/// This class is responsible for removing used insert tokens from the insert_tokens map.
|
|
|
|
/// Among several concurrent threads the first successful one is responsible for removal. But if they all
|
|
|
|
/// fail, then the last one is responsible.
|
|
|
|
struct InsertTokenHolder
|
|
|
|
{
|
|
|
|
const Key * key = nullptr;
|
|
|
|
std::shared_ptr<InsertToken> token;
|
|
|
|
bool cleaned_up = false;
|
|
|
|
|
|
|
|
InsertTokenHolder() = default;
|
|
|
|
|
|
|
|
void
|
|
|
|
acquire(const Key * key_, const std::shared_ptr<InsertToken> & token_, [[maybe_unused]] std::lock_guard<std::mutex> & cache_lock)
|
|
|
|
{
|
|
|
|
key = key_;
|
|
|
|
token = token_;
|
|
|
|
++token->refcount;
|
|
|
|
}
|
|
|
|
|
|
|
|
void cleanup([[maybe_unused]] std::lock_guard<std::mutex> & token_lock, [[maybe_unused]] std::lock_guard<std::mutex> & cache_lock)
|
|
|
|
{
|
|
|
|
token->cache.insert_tokens.erase(*key);
|
|
|
|
token->cleaned_up = true;
|
|
|
|
cleaned_up = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
~InsertTokenHolder()
|
|
|
|
{
|
|
|
|
if (!token)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (cleaned_up)
|
|
|
|
return;
|
|
|
|
|
|
|
|
std::lock_guard token_lock(token->mutex);
|
|
|
|
|
|
|
|
if (token->cleaned_up)
|
|
|
|
return;
|
|
|
|
|
|
|
|
std::lock_guard cache_lock(token->cache.mutex);
|
|
|
|
|
|
|
|
--token->refcount;
|
|
|
|
if (token->refcount == 0)
|
|
|
|
cleanup(token_lock, cache_lock);
|
|
|
|
}
|
2021-12-29 10:18:38 +00:00
|
|
|
};
|
2021-12-30 10:43:12 +00:00
|
|
|
|
|
|
|
friend struct InsertTokenHolder;
|
|
|
|
InsertTokenById insert_tokens;
|
2021-12-29 10:18:38 +00:00
|
|
|
WeightFunction weight_function;
|
2022-01-05 06:29:34 +00:00
|
|
|
ReleaseFunction release_function;
|
2021-12-29 10:18:38 +00:00
|
|
|
std::atomic<size_t> hits{0};
|
|
|
|
std::atomic<size_t> misses{0};
|
|
|
|
std::atomic<size_t> evict_count{0};
|
2021-12-29 10:27:52 +00:00
|
|
|
|
2021-12-30 20:29:06 +00:00
|
|
|
/// Returns nullptr when there is no more space for the new value or the old value is in used.
|
2021-12-29 07:25:33 +00:00
|
|
|
template <typename LoadFunc>
|
2021-12-29 10:18:38 +00:00
|
|
|
MappedPtr getImpl(const Key & key, LoadFunc && load_func)
|
2021-12-29 07:25:33 +00:00
|
|
|
{
|
2021-12-30 10:43:12 +00:00
|
|
|
InsertTokenHolder token_holder;
|
2021-12-29 07:25:33 +00:00
|
|
|
{
|
|
|
|
std::lock_guard lock(mutex);
|
|
|
|
auto it = cells.find(key);
|
2021-12-30 02:08:33 +00:00
|
|
|
if (it != cells.end() && !it->second.expired)
|
2021-12-29 07:25:33 +00:00
|
|
|
{
|
2021-12-30 02:08:33 +00:00
|
|
|
if (!it->second.expired)
|
|
|
|
{
|
2021-12-30 20:29:06 +00:00
|
|
|
++hits;
|
2021-12-30 02:08:33 +00:00
|
|
|
it->second.reference_count += 1;
|
|
|
|
queue.splice(queue.end(), queue, it->second.queue_iterator);
|
|
|
|
return it->second.value;
|
|
|
|
}
|
|
|
|
else if (it->second.reference_count > 0)
|
|
|
|
return nullptr;
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// should not reach here
|
|
|
|
LOG_ERROR(&Poco::Logger::get("LRUResourceCache"), "element is in invalid status.");
|
|
|
|
abort();
|
|
|
|
}
|
2021-12-29 07:25:33 +00:00
|
|
|
}
|
2021-12-30 20:29:06 +00:00
|
|
|
++misses;
|
2021-12-30 10:43:12 +00:00
|
|
|
auto & token = insert_tokens[key];
|
|
|
|
if (!token)
|
|
|
|
token = std::make_shared<InsertToken>(*this);
|
|
|
|
token_holder.acquire(&key, token, lock);
|
2021-12-29 07:25:33 +00:00
|
|
|
}
|
2021-12-30 10:43:12 +00:00
|
|
|
|
|
|
|
auto * token = token_holder.token.get();
|
|
|
|
std::lock_guard token_lock(token->mutex);
|
|
|
|
token_holder.cleaned_up = token->cleaned_up;
|
|
|
|
|
|
|
|
if (!token->value)
|
|
|
|
token->value = load_func();
|
|
|
|
|
|
|
|
std::lock_guard lock(mutex);
|
|
|
|
auto token_it = insert_tokens.find(key);
|
2021-12-29 07:25:33 +00:00
|
|
|
Cell * cell_ptr = nullptr;
|
2021-12-30 10:43:12 +00:00
|
|
|
if (token_it != insert_tokens.end() && token_it->second.get() == token)
|
2021-12-29 07:25:33 +00:00
|
|
|
{
|
2021-12-30 10:43:12 +00:00
|
|
|
cell_ptr = set(key, token->value);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
auto cell_it = cells.find(key);
|
|
|
|
if (cell_it != cells.end() && !cell_it->second.expired)
|
2021-12-29 07:25:33 +00:00
|
|
|
{
|
2021-12-30 10:43:12 +00:00
|
|
|
cell_ptr = &cell_it->second;
|
2021-12-29 07:25:33 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-12-30 10:43:12 +00:00
|
|
|
if (!token->cleaned_up)
|
|
|
|
token_holder.cleanup(token_lock, lock);
|
|
|
|
|
2021-12-29 07:25:33 +00:00
|
|
|
if (cell_ptr)
|
|
|
|
{
|
2021-12-30 13:48:54 +00:00
|
|
|
queue.splice(queue.end(), queue, cell_ptr->queue_iterator);
|
2021-12-30 10:43:12 +00:00
|
|
|
cell_ptr->reference_count++;
|
2021-12-29 07:25:33 +00:00
|
|
|
return cell_ptr->value;
|
|
|
|
}
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2021-12-29 10:18:38 +00:00
|
|
|
MappedPtr getImpl(const Key & key)
|
2021-12-29 07:25:33 +00:00
|
|
|
{
|
|
|
|
std::lock_guard lock(mutex);
|
2021-12-30 20:29:06 +00:00
|
|
|
|
2021-12-29 07:25:33 +00:00
|
|
|
auto it = cells.find(key);
|
2021-12-30 02:08:33 +00:00
|
|
|
if (it == cells.end() || it->second.expired)
|
2021-12-29 07:25:33 +00:00
|
|
|
{
|
2021-12-30 20:29:06 +00:00
|
|
|
++misses;
|
2021-12-29 07:25:33 +00:00
|
|
|
return nullptr;
|
|
|
|
}
|
2021-12-30 20:29:06 +00:00
|
|
|
|
|
|
|
++hits;
|
2021-12-29 07:25:33 +00:00
|
|
|
it->second.reference_count += 1;
|
|
|
|
queue.splice(queue.end(), queue, it->second.queue_iterator);
|
|
|
|
return it->second.value;
|
|
|
|
}
|
|
|
|
|
|
|
|
// mark a reference is released
|
|
|
|
void release(const Key & key)
|
|
|
|
{
|
|
|
|
std::lock_guard lock(mutex);
|
2021-12-30 20:29:06 +00:00
|
|
|
|
2021-12-29 07:25:33 +00:00
|
|
|
auto it = cells.find(key);
|
|
|
|
if (it == cells.end() || it->second.reference_count == 0)
|
|
|
|
{
|
|
|
|
LOG_ERROR(&Poco::Logger::get("LRUResourceCache"), "try to release an invalid element");
|
|
|
|
abort();
|
|
|
|
}
|
2021-12-30 20:29:06 +00:00
|
|
|
|
2021-12-30 02:08:33 +00:00
|
|
|
auto & cell = it->second;
|
|
|
|
cell.reference_count -= 1;
|
|
|
|
if (cell.expired && cell.reference_count == 0)
|
|
|
|
{
|
|
|
|
queue.erase(cell.queue_iterator);
|
|
|
|
current_weight -= cell.weight;
|
2022-01-05 06:29:34 +00:00
|
|
|
release_function(cell.value);
|
2021-12-30 02:08:33 +00:00
|
|
|
cells.erase(it);
|
|
|
|
}
|
2021-12-29 07:25:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
InsertToken * acquireInsertToken(const Key & key)
|
|
|
|
{
|
|
|
|
auto & token = insert_tokens[key];
|
|
|
|
token.reference_count += 1;
|
|
|
|
return &token;
|
|
|
|
}
|
|
|
|
|
|
|
|
void releaseInsertToken(const Key & key)
|
|
|
|
{
|
|
|
|
auto it = insert_tokens.find(key);
|
|
|
|
if (it != insert_tokens.end())
|
|
|
|
{
|
|
|
|
it->second.reference_count -= 1;
|
|
|
|
if (it->second.reference_count == 0)
|
|
|
|
insert_tokens.erase(it);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// key mustn't be in the cache
|
2021-12-29 10:18:38 +00:00
|
|
|
Cell * set(const Key & insert_key, MappedPtr value)
|
2021-12-29 07:25:33 +00:00
|
|
|
{
|
2022-01-04 07:34:34 +00:00
|
|
|
size_t weight = value ? weight_function(*value) : 0;
|
|
|
|
size_t queue_size = cells.size() + 1;
|
|
|
|
size_t loss_weight = 0;
|
2022-01-05 06:40:02 +00:00
|
|
|
auto is_overflow = [&] {
|
2022-01-04 07:34:34 +00:00
|
|
|
return current_weight + weight > max_weight + loss_weight || (max_element_size != 0 && queue_size > max_element_size);
|
2021-12-29 07:25:33 +00:00
|
|
|
};
|
2021-12-30 20:29:06 +00:00
|
|
|
|
2021-12-29 07:25:33 +00:00
|
|
|
auto key_it = queue.begin();
|
|
|
|
std::unordered_set<Key, HashFunction> to_release_keys;
|
2021-12-30 20:29:06 +00:00
|
|
|
|
2021-12-29 07:25:33 +00:00
|
|
|
while (is_overflow() && queue_size > 1 && key_it != queue.end())
|
|
|
|
{
|
|
|
|
const Key & key = *key_it;
|
2021-12-30 20:29:06 +00:00
|
|
|
|
2021-12-29 07:25:33 +00:00
|
|
|
auto cell_it = cells.find(key);
|
|
|
|
if (cell_it == cells.end())
|
|
|
|
{
|
|
|
|
LOG_ERROR(&Poco::Logger::get("LRUResourceCache"), "LRUResourceCache became inconsistent. There must be a bug in it.");
|
|
|
|
abort();
|
|
|
|
}
|
2021-12-30 20:29:06 +00:00
|
|
|
|
2021-12-29 07:25:33 +00:00
|
|
|
auto & cell = cell_it->second;
|
|
|
|
if (cell.reference_count == 0)
|
|
|
|
{
|
|
|
|
loss_weight += cell.weight;
|
2022-01-04 07:34:34 +00:00
|
|
|
queue_size--;
|
2021-12-29 07:25:33 +00:00
|
|
|
to_release_keys.insert(key);
|
|
|
|
}
|
2021-12-30 20:29:06 +00:00
|
|
|
|
|
|
|
++key_it;
|
2021-12-29 07:25:33 +00:00
|
|
|
}
|
2021-12-30 20:29:06 +00:00
|
|
|
|
2021-12-29 07:25:33 +00:00
|
|
|
if (is_overflow())
|
|
|
|
return nullptr;
|
2021-12-30 20:29:06 +00:00
|
|
|
|
2021-12-29 07:25:33 +00:00
|
|
|
if (loss_weight > current_weight + weight)
|
|
|
|
{
|
|
|
|
LOG_ERROR(&Poco::Logger::get("LRUResourceCache"), "LRUResourceCache became inconsistent. There must be a bug in it.");
|
|
|
|
abort();
|
|
|
|
}
|
2021-12-30 20:29:06 +00:00
|
|
|
|
2021-12-29 07:25:33 +00:00
|
|
|
for (auto & key : to_release_keys)
|
|
|
|
{
|
|
|
|
auto & cell = cells[key];
|
|
|
|
queue.erase(cell.queue_iterator);
|
2022-01-05 06:29:34 +00:00
|
|
|
release_function(cell.value);
|
2021-12-29 07:25:33 +00:00
|
|
|
cells.erase(key);
|
2021-12-30 20:29:06 +00:00
|
|
|
++evict_count;
|
2021-12-29 07:25:33 +00:00
|
|
|
}
|
2021-12-30 20:29:06 +00:00
|
|
|
|
2021-12-29 07:25:33 +00:00
|
|
|
current_weight = current_weight + weight - loss_weight;
|
|
|
|
|
|
|
|
auto & new_cell = cells[insert_key];
|
|
|
|
new_cell.value = value;
|
|
|
|
new_cell.weight = weight;
|
|
|
|
new_cell.queue_iterator = queue.insert(queue.end(), insert_key);
|
|
|
|
return &new_cell;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|