This commit is contained in:
nikitamikhaylov 2020-10-01 21:28:40 +03:00
parent 10adac00f4
commit fbd0d14dd6
6 changed files with 33 additions and 52 deletions

View File

@ -85,6 +85,7 @@ CacheDictionary::CacheDictionary(
, size{roundUpToPowerOfTwoOrZero(std::max(size_, size_t(max_collision_length)))}
, size_overlap_mask{this->size - 1}
, cells{this->size}
, default_keys{this->size}
, rnd_engine(randomSeed())
, update_queue(max_update_queue_size_)
, update_pool(max_threads_for_updates)
@ -309,14 +310,14 @@ std::string CacheDictionary::AttributeValuesForKey::dump()
std::string CacheDictionary::UpdateUnit::dumpFoundIds()
{
std::string ans;
std::ostringstream os;
for (auto it : found_ids)
{
ans += "Key: " + std::to_string(it.first) + "\n";
os << "Key: " << std::to_string(it.first) << "\n";
if (it.second.found)
ans += it.second.dump() + "\n";
os << it.second.dump() << "\n";
}
return ans;
return os.str();
};
/// returns cell_idx (always valid for replacing), 'cell is valid' flag, 'cell is outdated' flag
@ -387,12 +388,11 @@ void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8>
for (const auto row : ext::range(0, rows))
{
const auto id = ids[row];
{
std::shared_lock shared_lock(default_cache_rw_lock);
/// Check if the key is stored in the cache of defaults.
if (default_keys.find(id) != default_keys.end())
continue;
}
/// Check if the key is stored in the cache of defaults.
if (default_keys.has(id))
continue;
const auto find_result = findCellIdx(id, now);
auto insert_to_answer_routine = [&] ()
@ -461,7 +461,7 @@ void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8>
}
/// At this point we have two situations.
/// There may be both types of keys: cache_expired_ids and cache_not_found_ids.
/// There may be both types of keys: expired and not_found.
/// We will update them all synchronously.
std::vector<Key> required_ids;
@ -481,11 +481,8 @@ void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8>
for (const auto row : cache_expired_or_not_found_ids[key])
out[row] = true;
else
{
std::unique_lock unique_lock(default_cache_rw_lock);
/// Cache this key as default.
default_keys.insert(key);
}
default_keys.add(key);
}
}

View File

@ -22,6 +22,8 @@
#include "IDictionary.h"
#include "IDictionarySource.h"
#include <Common/LRUSet.h>
namespace CurrentMetrics
{
extern const Metric CacheDictionaryUpdateQueueBatches;
@ -346,6 +348,7 @@ private:
std::map<std::string, size_t> attribute_index_by_name;
mutable std::vector<Attribute> attributes;
mutable std::vector<CellMetadata> cells;
mutable LRUSet<Key> default_keys;
Attribute * hierarchical_attribute = nullptr;
std::unique_ptr<ArenaWithFreeLists> string_arena;
@ -360,21 +363,11 @@ private:
mutable std::atomic<size_t> hit_count{0};
mutable std::atomic<size_t> query_count{0};
mutable std::unordered_set<Key> default_keys;
/*
* Disclaimer: this comment is written not for fun.
*
* How the update goes: we basically have a method like get(keys)->values. Values are cached, so sometimes we
* can return them from the cache. For values not in cache, we query them from the dictionary, and add to the
* cache. The cache is lossy, so we can't expect it to store all the keys, and we store them separately. Normally,
* they would be passed as a return value of get(), but for Unknown Reasons the dictionaries use a baroque
* interface where get() accepts two callback, one that it calls for found values, and one for not found.
*
* Now we make it even uglier by doing this from multiple threads. The missing values are retrieved from the
* dictionary in a background thread, and this thread calls the provided callback. So if you provide the callbacks,
* you MUST wait until the background update finishes, or god knows what happens. Unfortunately, we have no
* way to check that you did this right, so good luck.
* can return them from the cache. For values not in cache, we query them from the source, and add to the
* cache. The cache is lossy, so we can't expect it to store all the keys, and we store them separately.
* So, there is a map of found keys to all its attributes.
*/
struct UpdateUnit
{

View File

@ -61,14 +61,10 @@ void CacheDictionary::getItemsNumberImpl(
for (const auto row : ext::range(0, rows))
{
const auto id = ids[row];
{
std::shared_lock shared_lock(default_cache_rw_lock);
/// First check if this key in the cache of default keys.
if (default_keys.find(id) != default_keys.end())
continue;
}
/// First check if this key in the cache of default keys.
if (default_keys.has(id))
continue;
/** cell should be updated if either:
* 1. ids do not match,
@ -174,9 +170,7 @@ void CacheDictionary::getItemsNumberImpl(
out[row] = std::get<OutputType>(value.values[attribute_index]);
} else
{
/// Add key to the cache of default keys.
std::unique_lock unique_lock(default_cache_rw_lock);
default_keys.emplace(key);
default_keys.add(key);
}
}
}
@ -207,7 +201,7 @@ void CacheDictionary::getItemsString(
{
std::shared_lock shared_lock(default_cache_rw_lock);
/// Check if the key is stored in the cache of defaults.
if (default_keys.find(id) != default_keys.end())
if (default_keys.has(id))
{
const auto string_ref = get_default(row);
out->insertData(string_ref.data, string_ref.size);
@ -258,14 +252,11 @@ void CacheDictionary::getItemsString(
for (const auto row : ext::range(0, ids.size()))
{
const auto id = ids[row];
/// Check if the key is stored in the cache of defaults.
if (default_keys.has(id))
{
std::shared_lock shared_lock(default_cache_rw_lock);
/// Check if the key is stored in the cache of defaults.
if (default_keys.find(id) != default_keys.end())
{
const auto string_ref = get_default(row);
out->insertData(string_ref.data, string_ref.size);
}
const auto string_ref = get_default(row);
out->insertData(string_ref.data, string_ref.size);
}
const auto find_result = findCellIdx(id, now);
@ -383,8 +374,7 @@ void CacheDictionary::getItemsString(
value = std::get<String>(found_it->second.values[attribute_index]);
else
{
std::unique_lock unique_lock(default_cache_rw_lock);
default_keys.insert(id);
default_keys.add(id);
value = get_default(row);
}
}

View File

@ -221,6 +221,7 @@ def test_reload_after_fail_in_cache_dictionary(started_cluster):
# Can't get a value from the cache dictionary because the source (table `test.xypairs`) doesn't respond.
expected_error = "Table test.xypairs doesn't exist"
update_error = "Could not update cache dictionary cache_xypairs now"
assert expected_error in query_and_get_error("SELECT dictGetUInt64('cache_xypairs', 'y', toUInt64(1))")
assert get_status("cache_xypairs") == "LOADED"
assert expected_error in get_last_exception("cache_xypairs")
@ -254,8 +255,9 @@ def test_reload_after_fail_in_cache_dictionary(started_cluster):
query("SELECT dictGet('cache_xypairs', 'y', toUInt64(1))") == "56"
query("SELECT dictGet('cache_xypairs', 'y', toUInt64(2))") == "0"
error = query_and_get_error("SELECT dictGetUInt64('cache_xypairs', 'y', toUInt64(3))")
assert expected_error in error
assert expected_error in get_last_exception("cache_xypairs")
assert (expected_error in error) or (update_error in error)
last_exception = get_last_exception("cache_xypairs")
assert (expected_error in last_exception) or (update_error in last_exception)
# Create table `test.xypairs` again with changed values.
query('''

View File

@ -47,7 +47,7 @@ def test_SYSTEM_RELOAD_DICTIONARY(started_cluster):
instance.query("INSERT INTO dictionary_source VALUES (1, 1)")
assert TSV(instance.query(
"SELECT dictGetUInt8('clickhouse_cache', 'value', toUInt64(0)), dictHas('clickhouse_cache', toUInt64(1))")) == TSV(
"0\t1\n")
"0\t0\n")
instance.query("SYSTEM RELOAD DICTIONARY clickhouse_cache")
assert TSV(instance.query(