ClickHouse/src/Coordination/SnapshotableHashTable.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

335 lines
9.4 KiB
C++
Raw Normal View History

2021-02-25 14:23:12 +00:00
#pragma once
2021-10-02 07:13:14 +00:00
#include <base/StringRef.h>
#include <Common/HashTable/HashMap.h>
#include <Common/ArenaWithFreeLists.h>
#include <Common/ArenaUtils.h>
2021-02-25 19:52:22 +00:00
#include <unordered_map>
#include <list>
2021-02-26 13:53:34 +00:00
#include <atomic>
#include <iostream>
2021-02-25 14:23:12 +00:00
namespace DB
{
2021-02-25 19:52:22 +00:00
template<typename V>
struct ListNode
2021-02-25 14:23:12 +00:00
{
StringRef key;
2021-02-25 19:52:22 +00:00
V value;
2022-01-17 14:54:09 +00:00
2022-02-10 09:37:49 +00:00
/// Monotonically increasing version info for snapshot
size_t version{0};
2022-01-17 14:54:09 +00:00
bool active_in_map{true};
bool free_key{false};
2021-02-25 14:23:12 +00:00
};
2021-02-25 19:52:22 +00:00
template <class V>
class SnapshotableHashTable
2021-02-25 14:23:12 +00:00
{
2021-02-25 19:52:22 +00:00
private:
2021-02-25 14:23:12 +00:00
2021-02-25 19:52:22 +00:00
using ListElem = ListNode<V>;
using List = std::list<ListElem>;
using Mapped = typename List::iterator;
using IndexMap = HashMap<StringRef, Mapped>;
2021-02-25 14:23:12 +00:00
2021-02-25 19:52:22 +00:00
List list;
IndexMap map;
2021-03-07 21:40:32 +00:00
bool snapshot_mode{false};
/// Allows to avoid additional copies in updateValue function
size_t current_version{0};
size_t snapshot_up_to_version{0};
ArenaWithFreeLists arena;
2022-02-10 09:56:41 +00:00
/// Collect invalid iterators to avoid traversing the whole list
std::vector<Mapped> snapshot_invalid_iters;
2021-02-25 14:23:12 +00:00
2021-11-18 20:17:22 +00:00
uint64_t approximate_data_size{0};
enum OperationType
{
INSERT = 0,
INSERT_OR_REPLACE = 1,
ERASE = 2,
UPDATE_VALUE = 3,
GET_VALUE = 4,
FIND = 5,
CONTAINS = 6,
CLEAR = 7,
CLEAR_OUTDATED_NODES = 8
};
/// Update hash table approximate data size
/// op_type: operation type
/// key_size: key size
/// value_size: size of value to add
/// old_value_size: size of value to minus
/// old_value_size=0 means there is no old value with the same key.
2021-11-19 09:30:58 +00:00
void updateDataSize(OperationType op_type, uint64_t key_size, uint64_t value_size, uint64_t old_value_size)
{
switch (op_type)
{
case INSERT:
approximate_data_size += key_size;
approximate_data_size += value_size;
break;
case INSERT_OR_REPLACE:
/// replace
2021-11-19 09:30:58 +00:00
if (old_value_size != 0)
{
approximate_data_size += key_size;
approximate_data_size += value_size;
2021-11-01 05:26:25 +00:00
if (!snapshot_mode)
{
2022-04-05 06:27:03 +00:00
approximate_data_size -= key_size;
approximate_data_size -= old_value_size;
}
}
2021-11-12 12:48:42 +00:00
/// insert
else
{
approximate_data_size += key_size;
approximate_data_size += value_size;
}
break;
case UPDATE_VALUE:
approximate_data_size += key_size;
approximate_data_size += value_size;
2021-11-01 05:26:25 +00:00
if (!snapshot_mode)
{
approximate_data_size -= key_size;
approximate_data_size -= old_value_size;
}
break;
case ERASE:
2021-11-01 05:26:25 +00:00
if (!snapshot_mode)
{
approximate_data_size -= key_size;
approximate_data_size -= old_value_size;
}
break;
case CLEAR:
approximate_data_size = 0;
break;
case CLEAR_OUTDATED_NODES:
approximate_data_size -= key_size;
approximate_data_size -= value_size;
break;
default:
break;
}
}
2021-02-25 19:52:22 +00:00
public:
2021-02-25 14:23:12 +00:00
using iterator = typename List::iterator;
using const_iterator = typename List::const_iterator;
2021-02-25 19:52:22 +00:00
using ValueUpdater = std::function<void(V & value)>;
2021-02-25 14:23:12 +00:00
2022-01-21 13:35:28 +00:00
std::pair<typename IndexMap::LookupResult, bool> insert(const std::string & key, const V & value)
2021-02-25 14:23:12 +00:00
{
size_t hash_value = map.hash(key);
auto it = map.find(key, hash_value);
if (!it)
2021-02-25 14:23:12 +00:00
{
ListElem elem{copyStringInArena(arena, key), value, current_version};
auto itr = list.insert(list.end(), std::move(elem));
bool inserted;
map.emplace(itr->key, it, inserted, hash_value);
assert(inserted);
it->getMapped() = itr;
updateDataSize(INSERT, key.size(), value.sizeInBytes(), 0);
2022-01-21 13:35:28 +00:00
return std::make_pair(it, true);
2021-02-25 14:23:12 +00:00
}
2021-02-26 13:53:34 +00:00
2022-01-21 13:35:28 +00:00
return std::make_pair(it, false);
2021-02-26 13:53:34 +00:00
}
2021-03-01 13:33:34 +00:00
void insertOrReplace(const std::string & key, const V & value)
{
size_t hash_value = map.hash(key);
auto it = map.find(key, hash_value);
uint64_t old_value_size = it == map.end() ? 0 : it->getMapped()->value.sizeInBytes();
2021-03-01 13:33:34 +00:00
if (it == map.end())
{
ListElem elem{copyStringInArena(arena, key), value, current_version};
auto itr = list.insert(list.end(), std::move(elem));
bool inserted;
map.emplace(itr->key, it, inserted, hash_value);
assert(inserted);
it->getMapped() = itr;
2021-03-01 13:33:34 +00:00
}
else
{
auto list_itr = it->getMapped();
2021-03-01 13:33:34 +00:00
if (snapshot_mode)
{
ListElem elem{list_itr->key, value, current_version};
2021-03-01 13:33:34 +00:00
list_itr->active_in_map = false;
auto new_list_itr = list.insert(list.end(), std::move(elem));
it->getMapped() = new_list_itr;
snapshot_invalid_iters.push_back(list_itr);
2021-03-01 13:33:34 +00:00
}
else
{
list_itr->value = value;
}
}
updateDataSize(INSERT_OR_REPLACE, key.size(), value.sizeInBytes(), old_value_size);
2021-03-01 13:33:34 +00:00
}
2021-02-26 13:53:34 +00:00
bool erase(const std::string & key)
{
auto it = map.find(key);
if (it == map.end())
return false;
auto list_itr = it->getMapped();
2021-11-18 20:17:22 +00:00
uint64_t old_data_size = list_itr->value.sizeInBytes();
2021-02-26 13:53:34 +00:00
if (snapshot_mode)
{
list_itr->active_in_map = false;
2022-02-10 09:56:41 +00:00
snapshot_invalid_iters.push_back(list_itr);
list_itr->free_key = true;
map.erase(it->getKey());
2021-02-26 13:53:34 +00:00
}
2021-02-25 14:23:12 +00:00
else
{
map.erase(it->getKey());
arena.free(const_cast<char *>(list_itr->key.data), list_itr->key.size);
2022-01-17 14:54:09 +00:00
list.erase(list_itr);
2021-02-25 14:23:12 +00:00
}
2021-02-26 13:53:34 +00:00
2021-11-12 12:48:42 +00:00
updateDataSize(ERASE, key.size(), 0, old_data_size);
2021-02-26 13:53:34 +00:00
return true;
2021-02-25 14:23:12 +00:00
}
2021-02-25 19:52:22 +00:00
bool contains(const std::string & key) const
2021-02-25 14:23:12 +00:00
{
2021-02-25 19:52:22 +00:00
return map.find(key) != map.end();
2021-02-25 14:23:12 +00:00
}
2022-01-19 11:46:29 +00:00
const_iterator updateValue(StringRef key, ValueUpdater updater)
2021-02-25 14:23:12 +00:00
{
size_t hash_value = map.hash(key);
auto it = map.find(key, hash_value);
2021-02-25 19:52:22 +00:00
assert(it != map.end());
auto list_itr = it->getMapped();
2021-11-18 20:17:22 +00:00
uint64_t old_value_size = list_itr->value.sizeInBytes();
const_iterator ret;
2021-02-25 19:52:22 +00:00
if (snapshot_mode)
{
/// We in snapshot mode but updating some node which is already more
/// fresh than snapshot distance. So it will not participate in
/// snapshot and we don't need to copy it.
2022-04-05 06:27:03 +00:00
if (list_itr->version <= snapshot_up_to_version)
2022-01-19 13:38:11 +00:00
{
auto elem_copy = *(list_itr);
list_itr->active_in_map = false;
2022-02-10 09:56:41 +00:00
snapshot_invalid_iters.push_back(list_itr);
2022-01-19 13:38:11 +00:00
updater(elem_copy.value);
elem_copy.version = current_version;
auto itr = list.insert(list.end(), std::move(elem_copy));
2022-01-19 13:38:11 +00:00
it->getMapped() = itr;
ret = itr;
}
else
{
updater(list_itr->value);
ret = list_itr;
}
2021-02-25 19:52:22 +00:00
}
else
{
updater(list_itr->value);
ret = list_itr;
2021-02-25 19:52:22 +00:00
}
2022-01-19 11:46:29 +00:00
updateDataSize(UPDATE_VALUE, key.size, ret->value.sizeInBytes(), old_value_size);
return ret;
2021-02-25 14:23:12 +00:00
}
2022-01-19 11:46:29 +00:00
const_iterator find(StringRef key) const
2021-02-26 13:53:34 +00:00
{
auto map_it = map.find(key);
if (map_it != map.end())
return map_it->getMapped();
2021-02-26 13:53:34 +00:00
return list.end();
}
2022-01-19 11:46:29 +00:00
const V & getValue(StringRef key) const
2021-02-25 14:23:12 +00:00
{
2021-02-25 19:52:22 +00:00
auto it = map.find(key);
assert(it);
return it->getMapped()->value;
2021-02-25 14:23:12 +00:00
}
void clearOutdatedNodes()
2021-02-26 13:53:34 +00:00
{
for (auto & itr: snapshot_invalid_iters)
2021-02-26 13:53:34 +00:00
{
assert(!itr->active_in_map);
updateDataSize(CLEAR_OUTDATED_NODES, itr->key.size, itr->value.sizeInBytes(), 0);
if (itr->free_key)
arena.free(const_cast<char *>(itr->key.data), itr->key.size);
list.erase(itr);
2021-02-26 13:53:34 +00:00
}
snapshot_invalid_iters.clear();
2021-02-26 13:53:34 +00:00
}
2021-02-25 19:52:22 +00:00
void clear()
2021-02-25 14:23:12 +00:00
{
2021-02-25 19:52:22 +00:00
map.clear();
2022-01-17 14:54:09 +00:00
for (auto itr = list.begin(); itr != list.end(); ++itr)
arena.free(const_cast<char *>(itr->key.data), itr->key.size);
list.clear();
updateDataSize(CLEAR, 0, 0, 0);
2021-02-25 14:23:12 +00:00
}
void enableSnapshotMode(size_t version)
2021-02-25 14:23:12 +00:00
{
2021-02-25 19:52:22 +00:00
snapshot_mode = true;
snapshot_up_to_version = version;
++current_version;
2021-02-25 14:23:12 +00:00
}
2021-02-26 13:53:34 +00:00
void disableSnapshotMode()
2021-02-25 14:23:12 +00:00
{
2021-02-25 19:52:22 +00:00
snapshot_mode = false;
2021-02-25 14:23:12 +00:00
}
2021-02-26 13:53:34 +00:00
size_t size() const
{
return map.size();
}
std::pair<size_t, size_t> snapshotSizeWithVersion() const
2021-02-26 13:53:34 +00:00
{
return std::make_pair(list.size(), current_version);
2021-02-26 13:53:34 +00:00
}
2021-11-19 07:52:35 +00:00
uint64_t getApproximateDataSize() const
{
return approximate_data_size;
}
2021-02-26 13:53:34 +00:00
2022-01-19 11:46:29 +00:00
uint64_t keyArenaSize() const
{
return arena.size();
}
2021-02-25 14:23:12 +00:00
iterator begin() { return list.begin(); }
const_iterator begin() const { return list.cbegin(); }
iterator end() { return list.end(); }
const_iterator end() const { return list.cend(); }
};
}