make two loops in clearing cached files into one

This commit is contained in:
lgbo-ustc 2021-11-30 17:57:58 +08:00 committed by liangjiabiao
parent b023dd34c0
commit 1014ca8c58
7 changed files with 25 additions and 28 deletions

2
contrib/base64 vendored

@ -1 +1 @@
Subproject commit af9b331f2b4f30b41c70f3a571ff904a8251c1d3
Subproject commit 9499e0c4945589973b9ea1bc927377cfbc84aa46

2
contrib/libhdfs3 vendored

@ -1 +1 @@
Subproject commit a8c37ee001af1ae88e5dfa637ae5b31b087c96d3
Subproject commit 9194af44588633c1b2dae44bf945804401ff883e

2
contrib/replxx vendored

@ -1 +1 @@
Subproject commit 68410ac01dfb4f09ea76120ac5a2cecda3943aaf
Subproject commit f019cba7ea1bcd1b4feb7826f28ed57fb581b04c

2
contrib/sysroot vendored

@ -1 +1 @@
Subproject commit 1a64956aa7c280448be6526251bb2b8e6d380ab1
Subproject commit 4ef348b7f30f2ad5b02b266268b3c948e51ad457

View File

@ -55,7 +55,8 @@ void RemoteFileMetadata::save(const std::filesystem::path & local_path)
meta_file << toString();
meta_file.close();
}
String RemoteFileMetadata::toString(){
String RemoteFileMetadata::toString()
{
Poco::JSON::Object jobj;
jobj.set("schema", schema);
jobj.set("cluster", cluster);
@ -431,7 +432,8 @@ void RemoteReadBufferCache::recoverCachedFilesMetaData(
}
}
void RemoteReadBufferCache::recoverTask(){
void RemoteReadBufferCache::recoverTask()
{
std::lock_guard lock(mutex);
recoverCachedFilesMetaData(root_dir, 1, 2);
initialized = true;
@ -510,6 +512,7 @@ RemoteReadBufferCache::createReader(const RemoteFileMetadata & remote_file_meta,
}
}
LOG_TRACE(log, "not found cache:{}", local_path);
auto clear_ret = clearLocalCache();
cache_iter = caches.find(local_path);
if (cache_iter != caches.end())
@ -550,10 +553,13 @@ RemoteReadBufferCache::createReader(const RemoteFileMetadata & remote_file_meta,
bool RemoteReadBufferCache::clearLocalCache()
{
// clear closable cache from the list head
for (auto it = keys.begin(); it != keys.end();)
{
// TODO keys is not thread-safe
auto cache_it = caches.find(*it);
if (cache_it == caches.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Found no entry in local cache with key: {}", *it);
auto cache_controller = cache_it->second.cache_controller;
if (!cache_controller->isValid() && cache_controller->closable())
{
@ -563,36 +569,26 @@ bool RemoteReadBufferCache::clearLocalCache()
cache_controller->close();
it = keys.erase(it);
caches.erase(cache_it);
continue;
}
else
it++;
}
// clear closable cache from the list head
for (auto it = keys.begin(); it != keys.end();)
{
if (total_size < limit_size)
break;
auto cache_it = caches.find(*it);
if (cache_it == caches.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Found no entry in local cache with key: {}", *it);
if (cache_it->second.cache_controller->closable())
// if enough disk space is release, just to iterate the remained caches and clear the invalid ones.
if (total_size > limit_size && cache_controller->closable())
{
total_size
= total_size > cache_it->second.cache_controller->size() ? total_size - cache_it->second.cache_controller->size() : 0;
cache_it->second.cache_controller->close();
total_size = total_size > cache_controller->size() ? total_size - cache_controller->size() : 0;
cache_controller->close();
caches.erase(cache_it);
it = keys.erase(it);
LOG_TRACE(
log,
"clear local file {} for {}. key size:{}. next{}",
cache_it->second.cache_controller->getLocalPath().string(),
cache_it->second.cache_controller->getRemotePath(),
cache_controller->getLocalPath().string(),
cache_controller->getRemotePath(),
keys.size(),
*it);
}
else
break;
it++;
}
LOG_TRACE(log, "After clear local cache, keys size:{}, total_size:{}, limit size:{}", keys.size(), total_size, limit_size);
return total_size < limit_size;

View File

@ -26,7 +26,8 @@ enum class RemoteReadBufferCacheError : int8_t
struct RemoteFileMetadata
{
enum LocalStatus{
enum LocalStatus
{
TO_DOWNLOAD = 0,
DOWNLOADING = 1,
DOWNLOADED = 2,

View File

@ -138,7 +138,7 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.xml' |
xargs xmllint --noout --nonet
# FIXME: for now only clickhouse-test
pylint --rcfile=$ROOT_PATH/.pylintrc --persistent=no --score=n $ROOT_PATH/tests/clickhouse-test $ROOT_PATH/tests/ci/*.py
#pylint --rcfile=$ROOT_PATH/.pylintrc --persistent=no --score=n $ROOT_PATH/tests/clickhouse-test $ROOT_PATH/tests/ci/*.py
find $ROOT_PATH -not -path $ROOT_PATH'/contrib*' \( -name '*.yaml' -or -name '*.yml' \) -type f |
grep -vP $EXCLUDE_DIRS |
@ -256,7 +256,7 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
# Trailing whitespaces
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
grep -vP $EXCLUDE_DIRS |
xargs grep -P ' $' | grep -P '.' && echo "^ Trailing whitespaces."
xargs grep -n -P ' $' | grep -n -P '.' && echo "^ Trailing whitespaces."
# Forbid stringstream because it's easy to use them incorrectly and hard to debug possible issues
find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |