2021-02-25 14:23:12 +00:00
|
|
|
#pragma once
|
2021-10-02 07:13:14 +00:00
|
|
|
#include <base/StringRef.h>
|
2021-12-30 16:21:49 +00:00
|
|
|
#include <Common/HashTable/HashMap.h>
|
|
|
|
#include <Common/ArenaWithFreeLists.h>
|
2022-02-10 20:06:23 +00:00
|
|
|
#include <Common/ArenaUtils.h>
|
2021-02-25 19:52:22 +00:00
|
|
|
#include <unordered_map>
|
|
|
|
#include <list>
|
2021-02-26 13:53:34 +00:00
|
|
|
#include <atomic>
|
2021-12-30 16:21:49 +00:00
|
|
|
#include <iostream>
|
2021-02-25 14:23:12 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2021-02-25 19:52:22 +00:00
|
|
|
template<typename V>
|
|
|
|
struct ListNode
|
2021-02-25 14:23:12 +00:00
|
|
|
{
|
2021-12-30 16:21:49 +00:00
|
|
|
StringRef key;
|
2021-02-25 19:52:22 +00:00
|
|
|
V value;
|
2022-01-17 14:54:09 +00:00
|
|
|
|
2022-02-10 09:37:49 +00:00
|
|
|
/// Monotonically increasing version info for snapshot
|
2022-02-10 09:13:46 +00:00
|
|
|
size_t version{0};
|
2022-01-17 14:54:09 +00:00
|
|
|
bool active_in_map{true};
|
2021-12-30 16:21:49 +00:00
|
|
|
bool free_key{false};
|
2021-02-25 14:23:12 +00:00
|
|
|
};
|
|
|
|
|
2021-02-25 19:52:22 +00:00
|
|
|
template <class V>
|
|
|
|
class SnapshotableHashTable
|
2021-02-25 14:23:12 +00:00
|
|
|
{
|
2021-02-25 19:52:22 +00:00
|
|
|
private:
|
2021-02-25 14:23:12 +00:00
|
|
|
|
2021-02-25 19:52:22 +00:00
|
|
|
using ListElem = ListNode<V>;
|
|
|
|
using List = std::list<ListElem>;
|
2021-12-30 16:21:49 +00:00
|
|
|
using Mapped = typename List::iterator;
|
|
|
|
using IndexMap = HashMap<StringRef, Mapped>;
|
2021-02-25 14:23:12 +00:00
|
|
|
|
2021-02-25 19:52:22 +00:00
|
|
|
List list;
|
|
|
|
IndexMap map;
|
2021-03-07 21:40:32 +00:00
|
|
|
bool snapshot_mode{false};
|
2022-01-24 10:17:40 +00:00
|
|
|
/// Allows to avoid additional copies in updateValue function
|
2022-02-10 09:13:46 +00:00
|
|
|
size_t current_version{0};
|
|
|
|
size_t snapshot_up_to_version{0};
|
2021-12-30 16:21:49 +00:00
|
|
|
ArenaWithFreeLists arena;
|
2022-02-10 09:56:41 +00:00
|
|
|
/// Collect invalid iterators to avoid traversing the whole list
|
|
|
|
std::vector<Mapped> snapshot_invalid_iters;
|
2021-02-25 14:23:12 +00:00
|
|
|
|
2021-11-18 20:17:22 +00:00
|
|
|
uint64_t approximate_data_size{0};
|
2021-11-01 05:20:42 +00:00
|
|
|
|
|
|
|
enum OperationType
|
|
|
|
{
|
|
|
|
INSERT = 0,
|
|
|
|
INSERT_OR_REPLACE = 1,
|
|
|
|
ERASE = 2,
|
|
|
|
UPDATE_VALUE = 3,
|
|
|
|
GET_VALUE = 4,
|
|
|
|
FIND = 5,
|
|
|
|
CONTAINS = 6,
|
|
|
|
CLEAR = 7,
|
|
|
|
CLEAR_OUTDATED_NODES = 8
|
|
|
|
};
|
|
|
|
|
|
|
|
/// Update hash table approximate data size
|
|
|
|
/// op_type: operation type
|
|
|
|
/// key_size: key size
|
|
|
|
/// value_size: size of value to add
|
|
|
|
/// old_value_size: size of value to minus
|
|
|
|
/// old_value_size=0 means there is no old value with the same key.
|
2021-11-19 09:30:58 +00:00
|
|
|
void updateDataSize(OperationType op_type, uint64_t key_size, uint64_t value_size, uint64_t old_value_size)
|
2021-11-01 05:20:42 +00:00
|
|
|
{
|
|
|
|
switch (op_type)
|
|
|
|
{
|
|
|
|
case INSERT:
|
|
|
|
approximate_data_size += key_size;
|
|
|
|
approximate_data_size += value_size;
|
|
|
|
break;
|
|
|
|
case INSERT_OR_REPLACE:
|
|
|
|
/// replace
|
2021-11-19 09:30:58 +00:00
|
|
|
if (old_value_size != 0)
|
2021-11-01 05:20:42 +00:00
|
|
|
{
|
|
|
|
approximate_data_size += key_size;
|
|
|
|
approximate_data_size += value_size;
|
2021-11-01 05:26:25 +00:00
|
|
|
if (!snapshot_mode)
|
2021-11-01 05:20:42 +00:00
|
|
|
{
|
2022-04-05 06:27:03 +00:00
|
|
|
approximate_data_size -= key_size;
|
2021-11-01 05:20:42 +00:00
|
|
|
approximate_data_size -= old_value_size;
|
|
|
|
}
|
|
|
|
}
|
2021-11-12 12:48:42 +00:00
|
|
|
/// insert
|
2021-11-01 05:20:42 +00:00
|
|
|
else
|
|
|
|
{
|
|
|
|
approximate_data_size += key_size;
|
|
|
|
approximate_data_size += value_size;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case UPDATE_VALUE:
|
|
|
|
approximate_data_size += key_size;
|
|
|
|
approximate_data_size += value_size;
|
2021-11-01 05:26:25 +00:00
|
|
|
if (!snapshot_mode)
|
2021-11-01 05:20:42 +00:00
|
|
|
{
|
|
|
|
approximate_data_size -= key_size;
|
|
|
|
approximate_data_size -= old_value_size;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case ERASE:
|
2021-11-01 05:26:25 +00:00
|
|
|
if (!snapshot_mode)
|
2021-11-01 05:20:42 +00:00
|
|
|
{
|
|
|
|
approximate_data_size -= key_size;
|
|
|
|
approximate_data_size -= old_value_size;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case CLEAR:
|
|
|
|
approximate_data_size = 0;
|
|
|
|
break;
|
|
|
|
case CLEAR_OUTDATED_NODES:
|
|
|
|
approximate_data_size -= key_size;
|
|
|
|
approximate_data_size -= value_size;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-25 19:52:22 +00:00
|
|
|
public:
|
2021-02-25 14:23:12 +00:00
|
|
|
|
|
|
|
using iterator = typename List::iterator;
|
|
|
|
using const_iterator = typename List::const_iterator;
|
2021-02-25 19:52:22 +00:00
|
|
|
using ValueUpdater = std::function<void(V & value)>;
|
2021-02-25 14:23:12 +00:00
|
|
|
|
2022-01-21 13:35:28 +00:00
|
|
|
std::pair<typename IndexMap::LookupResult, bool> insert(const std::string & key, const V & value)
|
2021-02-25 14:23:12 +00:00
|
|
|
{
|
2021-12-30 16:21:49 +00:00
|
|
|
size_t hash_value = map.hash(key);
|
|
|
|
auto it = map.find(key, hash_value);
|
|
|
|
|
|
|
|
if (!it)
|
2021-02-25 14:23:12 +00:00
|
|
|
{
|
2022-02-15 01:22:54 +00:00
|
|
|
ListElem elem{copyStringInArena(arena, key), value, current_version};
|
2022-02-10 09:13:46 +00:00
|
|
|
auto itr = list.insert(list.end(), std::move(elem));
|
2021-12-30 16:21:49 +00:00
|
|
|
bool inserted;
|
|
|
|
map.emplace(itr->key, it, inserted, hash_value);
|
|
|
|
assert(inserted);
|
|
|
|
|
|
|
|
it->getMapped() = itr;
|
2021-11-18 03:39:09 +00:00
|
|
|
updateDataSize(INSERT, key.size(), value.sizeInBytes(), 0);
|
2022-01-21 13:35:28 +00:00
|
|
|
return std::make_pair(it, true);
|
2021-02-25 14:23:12 +00:00
|
|
|
}
|
2021-02-26 13:53:34 +00:00
|
|
|
|
2022-01-21 13:35:28 +00:00
|
|
|
return std::make_pair(it, false);
|
2021-02-26 13:53:34 +00:00
|
|
|
}
|
|
|
|
|
2021-03-01 13:33:34 +00:00
|
|
|
void insertOrReplace(const std::string & key, const V & value)
|
|
|
|
{
|
2021-12-30 16:21:49 +00:00
|
|
|
size_t hash_value = map.hash(key);
|
|
|
|
auto it = map.find(key, hash_value);
|
|
|
|
uint64_t old_value_size = it == map.end() ? 0 : it->getMapped()->value.sizeInBytes();
|
2021-11-01 05:20:42 +00:00
|
|
|
|
2021-03-01 13:33:34 +00:00
|
|
|
if (it == map.end())
|
|
|
|
{
|
2022-02-15 01:22:54 +00:00
|
|
|
ListElem elem{copyStringInArena(arena, key), value, current_version};
|
2022-02-10 09:13:46 +00:00
|
|
|
auto itr = list.insert(list.end(), std::move(elem));
|
2021-12-30 16:21:49 +00:00
|
|
|
bool inserted;
|
|
|
|
map.emplace(itr->key, it, inserted, hash_value);
|
|
|
|
assert(inserted);
|
|
|
|
it->getMapped() = itr;
|
2021-03-01 13:33:34 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2021-12-30 16:21:49 +00:00
|
|
|
auto list_itr = it->getMapped();
|
2021-03-01 13:33:34 +00:00
|
|
|
if (snapshot_mode)
|
|
|
|
{
|
2022-02-10 09:13:46 +00:00
|
|
|
ListElem elem{list_itr->key, value, current_version};
|
2021-03-01 13:33:34 +00:00
|
|
|
list_itr->active_in_map = false;
|
2022-02-10 09:13:46 +00:00
|
|
|
auto new_list_itr = list.insert(list.end(), std::move(elem));
|
2021-12-30 16:21:49 +00:00
|
|
|
it->getMapped() = new_list_itr;
|
2022-02-10 07:43:39 +00:00
|
|
|
snapshot_invalid_iters.push_back(list_itr);
|
2021-03-01 13:33:34 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
list_itr->value = value;
|
|
|
|
}
|
|
|
|
}
|
2021-11-18 03:39:09 +00:00
|
|
|
updateDataSize(INSERT_OR_REPLACE, key.size(), value.sizeInBytes(), old_value_size);
|
2021-03-01 13:33:34 +00:00
|
|
|
}
|
|
|
|
|
2021-02-26 13:53:34 +00:00
|
|
|
bool erase(const std::string & key)
|
|
|
|
{
|
|
|
|
auto it = map.find(key);
|
|
|
|
if (it == map.end())
|
|
|
|
return false;
|
|
|
|
|
2021-12-30 16:21:49 +00:00
|
|
|
auto list_itr = it->getMapped();
|
2021-11-18 20:17:22 +00:00
|
|
|
uint64_t old_data_size = list_itr->value.sizeInBytes();
|
2021-02-26 13:53:34 +00:00
|
|
|
if (snapshot_mode)
|
|
|
|
{
|
|
|
|
list_itr->active_in_map = false;
|
2022-02-10 09:56:41 +00:00
|
|
|
snapshot_invalid_iters.push_back(list_itr);
|
2021-12-30 16:21:49 +00:00
|
|
|
list_itr->free_key = true;
|
|
|
|
map.erase(it->getKey());
|
2021-02-26 13:53:34 +00:00
|
|
|
}
|
2021-02-25 14:23:12 +00:00
|
|
|
else
|
|
|
|
{
|
2021-12-30 16:21:49 +00:00
|
|
|
map.erase(it->getKey());
|
|
|
|
arena.free(const_cast<char *>(list_itr->key.data), list_itr->key.size);
|
2022-01-17 14:54:09 +00:00
|
|
|
list.erase(list_itr);
|
2021-02-25 14:23:12 +00:00
|
|
|
}
|
2021-02-26 13:53:34 +00:00
|
|
|
|
2021-11-12 12:48:42 +00:00
|
|
|
updateDataSize(ERASE, key.size(), 0, old_data_size);
|
2021-02-26 13:53:34 +00:00
|
|
|
return true;
|
2021-02-25 14:23:12 +00:00
|
|
|
}
|
|
|
|
|
2021-02-25 19:52:22 +00:00
|
|
|
bool contains(const std::string & key) const
|
2021-02-25 14:23:12 +00:00
|
|
|
{
|
2021-02-25 19:52:22 +00:00
|
|
|
return map.find(key) != map.end();
|
2021-02-25 14:23:12 +00:00
|
|
|
}
|
|
|
|
|
2022-01-19 11:46:29 +00:00
|
|
|
const_iterator updateValue(StringRef key, ValueUpdater updater)
|
2021-02-25 14:23:12 +00:00
|
|
|
{
|
2021-12-30 16:21:49 +00:00
|
|
|
size_t hash_value = map.hash(key);
|
|
|
|
auto it = map.find(key, hash_value);
|
2021-02-25 19:52:22 +00:00
|
|
|
assert(it != map.end());
|
2021-11-01 05:20:42 +00:00
|
|
|
|
2021-12-30 16:21:49 +00:00
|
|
|
auto list_itr = it->getMapped();
|
2021-11-18 20:17:22 +00:00
|
|
|
uint64_t old_value_size = list_itr->value.sizeInBytes();
|
2021-11-01 05:20:42 +00:00
|
|
|
|
|
|
|
const_iterator ret;
|
|
|
|
|
2021-02-25 19:52:22 +00:00
|
|
|
if (snapshot_mode)
|
|
|
|
{
|
2022-01-24 10:17:40 +00:00
|
|
|
/// We in snapshot mode but updating some node which is already more
|
|
|
|
/// fresh than snapshot distance. So it will not participate in
|
|
|
|
/// snapshot and we don't need to copy it.
|
2022-04-05 06:27:03 +00:00
|
|
|
if (list_itr->version <= snapshot_up_to_version)
|
2022-01-19 13:38:11 +00:00
|
|
|
{
|
|
|
|
auto elem_copy = *(list_itr);
|
|
|
|
list_itr->active_in_map = false;
|
2022-02-10 09:56:41 +00:00
|
|
|
snapshot_invalid_iters.push_back(list_itr);
|
2022-01-19 13:38:11 +00:00
|
|
|
updater(elem_copy.value);
|
2022-02-10 09:13:46 +00:00
|
|
|
elem_copy.version = current_version;
|
|
|
|
auto itr = list.insert(list.end(), std::move(elem_copy));
|
2022-01-19 13:38:11 +00:00
|
|
|
it->getMapped() = itr;
|
|
|
|
ret = itr;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
updater(list_itr->value);
|
|
|
|
ret = list_itr;
|
|
|
|
}
|
2021-02-25 19:52:22 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
updater(list_itr->value);
|
2021-11-01 05:20:42 +00:00
|
|
|
ret = list_itr;
|
2021-02-25 19:52:22 +00:00
|
|
|
}
|
2021-12-30 16:21:49 +00:00
|
|
|
|
2022-01-19 11:46:29 +00:00
|
|
|
updateDataSize(UPDATE_VALUE, key.size, ret->value.sizeInBytes(), old_value_size);
|
2021-11-01 05:20:42 +00:00
|
|
|
return ret;
|
2021-02-25 14:23:12 +00:00
|
|
|
}
|
|
|
|
|
2022-01-19 11:46:29 +00:00
|
|
|
const_iterator find(StringRef key) const
|
2021-02-26 13:53:34 +00:00
|
|
|
{
|
|
|
|
auto map_it = map.find(key);
|
|
|
|
if (map_it != map.end())
|
2021-12-30 16:21:49 +00:00
|
|
|
return map_it->getMapped();
|
2021-02-26 13:53:34 +00:00
|
|
|
return list.end();
|
|
|
|
}
|
|
|
|
|
2021-12-30 16:21:49 +00:00
|
|
|
|
2022-01-19 11:46:29 +00:00
|
|
|
const V & getValue(StringRef key) const
|
2021-02-25 14:23:12 +00:00
|
|
|
{
|
2021-02-25 19:52:22 +00:00
|
|
|
auto it = map.find(key);
|
2021-12-30 16:21:49 +00:00
|
|
|
assert(it);
|
|
|
|
return it->getMapped()->value;
|
2021-02-25 14:23:12 +00:00
|
|
|
}
|
|
|
|
|
2022-01-24 10:17:40 +00:00
|
|
|
void clearOutdatedNodes()
|
2021-02-26 13:53:34 +00:00
|
|
|
{
|
2022-02-10 07:43:39 +00:00
|
|
|
for (auto & itr: snapshot_invalid_iters)
|
2021-02-26 13:53:34 +00:00
|
|
|
{
|
2022-02-10 07:43:39 +00:00
|
|
|
assert(!itr->active_in_map);
|
|
|
|
updateDataSize(CLEAR_OUTDATED_NODES, itr->key.size, itr->value.sizeInBytes(), 0);
|
|
|
|
if (itr->free_key)
|
|
|
|
arena.free(const_cast<char *>(itr->key.data), itr->key.size);
|
|
|
|
list.erase(itr);
|
2021-02-26 13:53:34 +00:00
|
|
|
}
|
2022-02-10 07:43:39 +00:00
|
|
|
snapshot_invalid_iters.clear();
|
2021-02-26 13:53:34 +00:00
|
|
|
}
|
|
|
|
|
2021-02-25 19:52:22 +00:00
|
|
|
void clear()
|
2021-02-25 14:23:12 +00:00
|
|
|
{
|
2021-02-25 19:52:22 +00:00
|
|
|
map.clear();
|
2022-01-17 14:54:09 +00:00
|
|
|
for (auto itr = list.begin(); itr != list.end(); ++itr)
|
|
|
|
arena.free(const_cast<char *>(itr->key.data), itr->key.size);
|
|
|
|
list.clear();
|
2021-11-01 05:20:42 +00:00
|
|
|
updateDataSize(CLEAR, 0, 0, 0);
|
2021-02-25 14:23:12 +00:00
|
|
|
}
|
|
|
|
|
2022-02-10 09:13:46 +00:00
|
|
|
void enableSnapshotMode(size_t version)
|
2021-02-25 14:23:12 +00:00
|
|
|
{
|
2021-02-25 19:52:22 +00:00
|
|
|
snapshot_mode = true;
|
2022-02-10 09:13:46 +00:00
|
|
|
snapshot_up_to_version = version;
|
|
|
|
++current_version;
|
2021-02-25 14:23:12 +00:00
|
|
|
}
|
|
|
|
|
2021-02-26 13:53:34 +00:00
|
|
|
void disableSnapshotMode()
|
2021-02-25 14:23:12 +00:00
|
|
|
{
|
2021-02-25 19:52:22 +00:00
|
|
|
snapshot_mode = false;
|
2021-02-25 14:23:12 +00:00
|
|
|
}
|
|
|
|
|
2021-02-26 13:53:34 +00:00
|
|
|
size_t size() const
|
|
|
|
{
|
|
|
|
return map.size();
|
|
|
|
}
|
|
|
|
|
2022-02-10 09:13:46 +00:00
|
|
|
std::pair<size_t, size_t> snapshotSizeWithVersion() const
|
2021-02-26 13:53:34 +00:00
|
|
|
{
|
2022-02-10 09:13:46 +00:00
|
|
|
return std::make_pair(list.size(), current_version);
|
2021-02-26 13:53:34 +00:00
|
|
|
}
|
|
|
|
|
2021-11-19 07:52:35 +00:00
|
|
|
uint64_t getApproximateDataSize() const
|
2021-11-01 05:20:42 +00:00
|
|
|
{
|
|
|
|
return approximate_data_size;
|
|
|
|
}
|
2021-02-26 13:53:34 +00:00
|
|
|
|
2022-01-19 11:46:29 +00:00
|
|
|
uint64_t keyArenaSize() const
|
|
|
|
{
|
|
|
|
return arena.size();
|
|
|
|
}
|
|
|
|
|
2021-02-25 14:23:12 +00:00
|
|
|
iterator begin() { return list.begin(); }
|
|
|
|
const_iterator begin() const { return list.cbegin(); }
|
|
|
|
iterator end() { return list.end(); }
|
|
|
|
const_iterator end() const { return list.cend(); }
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
}
|