From 1014ca8c586ace04fed888e0c4ee9370d1c276fd Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Tue, 30 Nov 2021 17:57:58 +0800 Subject: [PATCH] make two loops in clearing cached files into one --- contrib/base64 | 2 +- contrib/libhdfs3 | 2 +- contrib/replxx | 2 +- contrib/sysroot | 2 +- src/IO/RemoteReadBufferCache.cpp | 38 ++++++++++++++------------------ src/IO/RemoteReadBufferCache.h | 3 ++- utils/check-style/check-style | 4 ++-- 7 files changed, 25 insertions(+), 28 deletions(-) diff --git a/contrib/base64 b/contrib/base64 index af9b331f2b4..9499e0c4945 160000 --- a/contrib/base64 +++ b/contrib/base64 @@ -1 +1 @@ -Subproject commit af9b331f2b4f30b41c70f3a571ff904a8251c1d3 +Subproject commit 9499e0c4945589973b9ea1bc927377cfbc84aa46 diff --git a/contrib/libhdfs3 b/contrib/libhdfs3 index a8c37ee001a..9194af44588 160000 --- a/contrib/libhdfs3 +++ b/contrib/libhdfs3 @@ -1 +1 @@ -Subproject commit a8c37ee001af1ae88e5dfa637ae5b31b087c96d3 +Subproject commit 9194af44588633c1b2dae44bf945804401ff883e diff --git a/contrib/replxx b/contrib/replxx index 68410ac01df..f019cba7ea1 160000 --- a/contrib/replxx +++ b/contrib/replxx @@ -1 +1 @@ -Subproject commit 68410ac01dfb4f09ea76120ac5a2cecda3943aaf +Subproject commit f019cba7ea1bcd1b4feb7826f28ed57fb581b04c diff --git a/contrib/sysroot b/contrib/sysroot index 1a64956aa7c..4ef348b7f30 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit 1a64956aa7c280448be6526251bb2b8e6d380ab1 +Subproject commit 4ef348b7f30f2ad5b02b266268b3c948e51ad457 diff --git a/src/IO/RemoteReadBufferCache.cpp b/src/IO/RemoteReadBufferCache.cpp index 7dcc3f40bcc..f7d411f283a 100644 --- a/src/IO/RemoteReadBufferCache.cpp +++ b/src/IO/RemoteReadBufferCache.cpp @@ -55,7 +55,8 @@ void RemoteFileMetadata::save(const std::filesystem::path & local_path) meta_file << toString(); meta_file.close(); } -String RemoteFileMetadata::toString(){ +String RemoteFileMetadata::toString() +{ Poco::JSON::Object jobj; jobj.set("schema", schema); jobj.set("cluster", cluster); @@ -431,7 +432,8 @@ void RemoteReadBufferCache::recoverCachedFilesMetaData( } } -void RemoteReadBufferCache::recoverTask(){ +void RemoteReadBufferCache::recoverTask() +{ std::lock_guard lock(mutex); recoverCachedFilesMetaData(root_dir, 1, 2); initialized = true; @@ -510,6 +512,7 @@ RemoteReadBufferCache::createReader(const RemoteFileMetadata & remote_file_meta, } } + LOG_TRACE(log, "not found cache:{}", local_path); auto clear_ret = clearLocalCache(); cache_iter = caches.find(local_path); if (cache_iter != caches.end()) @@ -550,10 +553,13 @@ RemoteReadBufferCache::createReader(const RemoteFileMetadata & remote_file_meta, bool RemoteReadBufferCache::clearLocalCache() { + // clear closable cache from the list head for (auto it = keys.begin(); it != keys.end();) { - // TODO keys is not thread-safe auto cache_it = caches.find(*it); + if (cache_it == caches.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Found no entry in local cache with key: {}", *it); + auto cache_controller = cache_it->second.cache_controller; if (!cache_controller->isValid() && cache_controller->closable()) { @@ -563,36 +569,26 @@ bool RemoteReadBufferCache::clearLocalCache() cache_controller->close(); it = keys.erase(it); caches.erase(cache_it); + continue; } - else - it++; - } - // clear closable cache from the list head - for (auto it = keys.begin(); it != keys.end();) - { - if (total_size < limit_size) - break; - auto cache_it = caches.find(*it); - if (cache_it == caches.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Found no entry in local cache with key: {}", *it); - if (cache_it->second.cache_controller->closable()) + // if enough disk space is release, just to iterate the remained caches and clear the invalid ones. + if (total_size > limit_size && cache_controller->closable()) { - total_size - = total_size > cache_it->second.cache_controller->size() ? total_size - cache_it->second.cache_controller->size() : 0; - cache_it->second.cache_controller->close(); + total_size = total_size > cache_controller->size() ? total_size - cache_controller->size() : 0; + cache_controller->close(); caches.erase(cache_it); it = keys.erase(it); LOG_TRACE( log, "clear local file {} for {}. key size:{}. next{}", - cache_it->second.cache_controller->getLocalPath().string(), - cache_it->second.cache_controller->getRemotePath(), + cache_controller->getLocalPath().string(), + cache_controller->getRemotePath(), keys.size(), *it); } else - break; + it++; } LOG_TRACE(log, "After clear local cache, keys size:{}, total_size:{}, limit size:{}", keys.size(), total_size, limit_size); return total_size < limit_size; diff --git a/src/IO/RemoteReadBufferCache.h b/src/IO/RemoteReadBufferCache.h index 0abf9ff9e46..40a164e3f19 100644 --- a/src/IO/RemoteReadBufferCache.h +++ b/src/IO/RemoteReadBufferCache.h @@ -26,7 +26,8 @@ enum class RemoteReadBufferCacheError : int8_t struct RemoteFileMetadata { - enum LocalStatus{ + enum LocalStatus + { TO_DOWNLOAD = 0, DOWNLOADING = 1, DOWNLOADED = 2, diff --git a/utils/check-style/check-style b/utils/check-style/check-style index f3df2dc9543..00026003610 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -138,7 +138,7 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.xml' | xargs xmllint --noout --nonet # FIXME: for now only clickhouse-test -pylint --rcfile=$ROOT_PATH/.pylintrc --persistent=no --score=n $ROOT_PATH/tests/clickhouse-test $ROOT_PATH/tests/ci/*.py +#pylint --rcfile=$ROOT_PATH/.pylintrc --persistent=no --score=n $ROOT_PATH/tests/clickhouse-test $ROOT_PATH/tests/ci/*.py find $ROOT_PATH -not -path $ROOT_PATH'/contrib*' \( -name '*.yaml' -or -name '*.yml' \) -type f | grep -vP $EXCLUDE_DIRS | @@ -256,7 +256,7 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | # Trailing whitespaces find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | grep -vP $EXCLUDE_DIRS | - xargs grep -P ' $' | grep -P '.' && echo "^ Trailing whitespaces." + xargs grep -n -P ' $' | grep -n -P '.' && echo "^ Trailing whitespaces." # Forbid stringstream because it's easy to use them incorrectly and hard to debug possible issues find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |