ClickHouse/src/Interpreters/ServerAsynchronousMetrics.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

448 lines
22 KiB
C++
Raw Normal View History

#include <Interpreters/ServerAsynchronousMetrics.h>
#include <Interpreters/Aggregator.h>
#include <Interpreters/AsynchronousMetricLog.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/Cache/FileCache.h>
#include <Interpreters/Cache/FileCacheFactory.h>
#include <Interpreters/Context.h>
2023-08-14 16:25:52 +00:00
#include <Interpreters/Cache/QueryCache.h>
2022-11-10 10:41:49 +00:00
#include <Interpreters/JIT/CompiledExpressionCache.h>
#include <Databases/IDatabase.h>
#include <IO/UncompressedCache.h>
#include <IO/MMappedFileCache.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/StorageMergeTree.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/MarkCache.h>
#include <Coordination/KeeperAsynchronousMetrics.h>
namespace DB
{
namespace ErrorCodes
{
extern const int INVALID_SETTING_VALUE;
}
namespace
{
template <typename Max, typename T>
2022-11-10 10:41:49 +00:00
void calculateMax(Max & max, T x)
{
if (Max(x) > max)
max = x;
}
template <typename Max, typename Sum, typename T>
2022-11-10 10:41:49 +00:00
void calculateMaxAndSum(Max & max, Sum & sum, T x)
{
sum += x;
if (Max(x) > max)
max = x;
}
}
ServerAsynchronousMetrics::ServerAsynchronousMetrics(
ContextPtr global_context_,
int update_period_seconds,
int heavy_metrics_update_period_seconds,
const ProtocolServerMetricsFunc & protocol_server_metrics_func_)
Fix Context use-after-free in ServerAsynchronousMetrics TSan found [1]: WARNING: ThreadSanitizer: data race on vptr (ctor/dtor vs virtual call) (pid=598) Write of size 8 at 0x7ffcf9ad9cb0 by main thread: 0 DB::AsynchronousMetrics::~AsynchronousMetrics() build_docker/./src/Common/AsynchronousMetrics.cpp:299:1 (clickhouse+0xf38ed6e) (BuildId: 31dcc7d77a0b2aaf9de7aca070b5f6ed6ac3dcbf) 1 DB::ServerAsynchronousMetrics::~ServerAsynchronousMetrics() build_docker/./src/Interpreters/ServerAsynchronousMetrics.h:10:7 (clickhouse+0xf2b2220) (BuildId: 31dcc7d77a0b2aaf9de7aca070b5f6ed6ac3dcbf) 2 DB::Server::main(std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>>> const&) build_docker/./programs/server/Server.cpp:1995:1 (clickhouse+0xf2b2220) 3 Poco::Util::Application::run() build_docker/./base/poco/Util/src/Application.cpp:315:8 (clickhouse+0x1d6c535e) (BuildId: 31dcc7d77a0b2aaf9de7aca070b5f6ed6ac3dcbf) 4 DB::Server::run() build_docker/./programs/server/Server.cpp:397:25 (clickhouse+0xf29d341) (BuildId: 31dcc7d77a0b2aaf9de7aca070b5f6ed6ac3dcbf) 5 Poco::Util::ServerApplication::run(int, char**) build_docker/./base/poco/Util/src/ServerApplication.cpp:131:9 (clickhouse+0x1d6e50b4) (BuildId: 31dcc7d77a0b2aaf9de7aca070b5f6ed6ac3dcbf) 6 mainEntryClickHouseServer(int, char**) build_docker/./programs/server/Server.cpp:203:20 (clickhouse+0xf29a4a3) (BuildId: 31dcc7d77a0b2aaf9de7aca070b5f6ed6ac3dcbf) 7 main build_docker/./programs/main.cpp:505:12 (clickhouse+0x72d72a0) (BuildId: 31dcc7d77a0b2aaf9de7aca070b5f6ed6ac3dcbf) Previous read of size 8 at 0x7ffcf9ad9cb0 by thread T727: 0 DB::AsynchronousMetrics::update(std::__1::chrono::time_point<std::__1::chrono::system_clock, std::__1::chrono::duration<long long, std::__1::ratio<1l, 1000000l>>>) build_docker/./src/Common/AsynchronousMetrics.cpp:1559:5 (clickhouse+0xf38dec5) (BuildId: 31dcc7d77a0b2aaf9de7aca070b5f6ed6ac3dcbf) 1 DB::AsynchronousMetrics::run() build_docker/./src/Common/AsynchronousMetrics.cpp:354:13 (clickhouse+0xf38f996) (BuildId: 31dcc7d77a0b2aaf9de7aca070b5f6ed6ac3dcbf) 2 DB::AsynchronousMetrics::start()::$_0::operator()() const build_docker/./src/Common/AsynchronousMetrics.cpp:273:62 (clickhouse+0xf3921ca) (BuildId: 31dcc7d77a0b2aaf9de7aca070b5f6ed6ac3dcbf) 3 decltype(std::declval<DB::AsynchronousMetrics::start()::$_0&>()()) std::__1::__invoke[abi:v15000]<DB::AsynchronousMetrics::start()::$_0&>(DB::AsynchronousMetrics::start()::$_0&) build_docker/./contrib/llvm-project/libcxx/include/__functional/invoke.h:394:23 (clickhouse+0xf3921ca) 4 decltype(auto) std::__1::__apply_tuple_impl[abi:v15000]<DB::AsynchronousMetrics::start()::$_0&, std::__1::tuple<>&>(DB::AsynchronousMetrics::start()::$_0&, std::__1::tuple<>&, std::__1::__tuple_indices<>) build_docker/./contrib/llvm-project/libcxx/include/tuple:1789:1 (clickhouse+0xf3921ca) 5 decltype(auto) std::__1::apply[abi:v15000]<DB::AsynchronousMetrics::start()::$_0&, std::__1::tuple<>&>(DB::AsynchronousMetrics::start()::$_0&, std::__1::tuple<>&) build_docker/./contrib/llvm-project/libcxx/include/tuple:1798:1 (clickhouse+0xf3921ca) 6 ThreadFromGlobalPoolImpl<true>::ThreadFromGlobalPoolImpl<DB::AsynchronousMetrics::start()::$_0>(DB::AsynchronousMetrics::start()::$_0&&)::'lambda'()::operator()() build_docker/./src/Common/ThreadPool.h:253:13 (clickhouse+0xf3921ca) 7 decltype(std::declval<DB::AsynchronousMetrics::start()::$_0>()()) std::__1::__invoke[abi:v15000]<ThreadFromGlobalPoolImpl<true>::ThreadFromGlobalPoolImpl<DB::AsynchronousMetrics::start()::$_0>(DB::AsynchronousMetrics::start()::$_0&&)::'lambda'()&>(DB::AsynchronousMetrics::start()::$_0&&) build_docker/./contrib/llvm-project/libcxx/include/__functional/invoke.h:394:23 (clickhouse+0xf3921ca) 8 void std::__1::__invoke_void_return_wrapper<void, true>::__call<ThreadFromGlobalPoolImpl<true>::ThreadFromGlobalPoolImpl<DB::AsynchronousMetrics::start()::$_0>(DB::AsynchronousMetrics::start()::$_0&&)::'lambda'()&>(ThreadFromGlobalPoolImpl<true>::ThreadFromGlobalPoolImpl<DB::AsynchronousMetrics::start()::$_0>(DB::AsynchronousMetrics::start()::$_0&&)::'lambda'()&) build_docker/./contrib/llvm-project/libcxx/include/__functional/invoke.h:479:9 (clickhouse+0xf3921ca) 9 std::__1::__function::__default_alloc_func<ThreadFromGlobalPoolImpl<true>::ThreadFromGlobalPoolImpl<DB::AsynchronousMetrics::start()::$_0>(DB::AsynchronousMetrics::start()::$_0&&)::'lambda'(), void ()>::operator()[abi:v15000]() build_docker/./contrib/llvm-project/libcxx/include/__functional/function.h:235:12 (clickhouse+0xf3921ca) 10 void std::__1::__function::__policy_invoker<void ()>::__call_impl<std::__1::__function::__default_alloc_func<ThreadFromGlobalPoolImpl<true>::ThreadFromGlobalPoolImpl<DB::AsynchronousMetrics::start()::$_0>(DB::AsynchronousMetrics::start()::$_0&&)::'lambda'(), void ()>>(std::__1::__function::__policy_storage const*) build_docker/./contrib/llvm-project/libcxx/include/__functional/function.h:716:16 (clickhouse+0xf3921ca) 11 std::__1::__function::__policy_func<void ()>::operator()[abi:v15000]() const build_docker/./contrib/llvm-project/libcxx/include/__functional/function.h:848:16 (clickhouse+0xf305b2e) (BuildId: 31dcc7d77a0b2aaf9de7aca070b5f6ed6ac3dcbf) 12 std::__1::function<void ()>::operator()() const build_docker/./contrib/llvm-project/libcxx/include/__functional/function.h:1187:12 (clickhouse+0xf305b2e) 13 ThreadPoolImpl<std::__1::thread>::worker(std::__1::__list_iterator<std::__1::thread, void*>) build_docker/./src/Common/ThreadPool.cpp:421:13 (clickhouse+0xf305b2e) 14 void ThreadPoolImpl<std::__1::thread>::scheduleImpl<void>(std::__1::function<void ()>, Priority, std::__1::optional<unsigned long>, bool)::'lambda0'()::operator()() const build_docker/./src/Common/ThreadPool.cpp:183:73 (clickhouse+0xf30c6d1) (BuildId: 31dcc7d77a0b2aaf9de7aca070b5f6ed6ac3dcbf) 15 decltype(std::declval<void>()()) std::__1::__invoke[abi:v15000]<void ThreadPoolImpl<std::__1::thread>::scheduleImpl<void>(std::__1::function<void ()>, Priority, std::__1::optional<unsigned long>, bool)::'lambda0'()>(void&&) build_docker/./contrib/llvm-project/libcxx/include/__functional/invoke.h:394:23 (clickhouse+0xf30c6d1) 16 void std::__1::__thread_execute[abi:v15000]<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct>>, void ThreadPoolImpl<std::__1::thread>::scheduleImpl<void>(std::__1::function<void ()>, Priority, std::__1::optional<unsigned long>, bool)::'lambda0'()>(std::__1::tuple<void, void ThreadPoolImpl<std::__1::thread>::scheduleImpl<void>(std::__1::function<void ()>, Priority, std::__1::optional<unsigned long>, bool)::'lambda0'()>&, std::__1::__tuple_indices<>) build_docker/./contrib/llvm-project/libcxx/include/thread:284:5 (clickhouse+0xf30c6d1) 17 void* std::__1::__thread_proxy[abi:v15000]<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct>>, void ThreadPoolImpl<std::__1::thread>::scheduleImpl<void>(std::__1::function<void ()>, Priority, std::__1::optional<unsigned long>, bool)::'lambda0'()>>(void*) build_docker/./contrib/llvm-project/libcxx/include/thread:295:5 (clickhouse+0xf30c6d1) SUMMARY: ThreadSanitizer: data race on vptr (ctor/dtor vs virtual call) build_docker/./src/Common/AsynchronousMetrics.cpp:299:1 in DB::AsynchronousMetrics::~AsynchronousMetrics() Or MSan [2]: ==573==WARNING: MemorySanitizer: use-of-uninitialized-value 0 0x55b57079112d in std::__1::weak_ptr<DB::Context const>::lock() const build_docker/./contrib/llvm-project/libcxx/include/__memory/shared_ptr.h:1645:20 1 0x55b57079112d in DB::WithContextImpl<std::__1::shared_ptr<DB::Context const>>::getContext() const build_docker/./src/Interpreters/Context_fwd.h:41:28 2 0x55b57079112d in DB::ServerAsynchronousMetrics::updateImpl(std::__1::unordered_map<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>, DB::AsynchronousMetricValue, std::__1::hash<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>>, std::__1::equal_to<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>>, std::__1::allocator<std::__1::pair<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const, DB::AsynchronousMetricValue>>>&, std::__1::chrono::time_point<std::__1::chrono::system_clock, std::__1::chrono::duration<long long, std::__1::ratio<1l, 1000000l>>>, std::__1::chrono::time_point<std::__1::chrono::system_clock, std::__1::chrono::duration<long long, std::__1::ratio<1l, 1000000l>>>) build_docker/./src/Interpreters/ServerAsynchronousMetrics.cpp:260:63 3 0x55b559540309 in DB::AsynchronousMetrics::update(std::__1::chrono::time_point<std::__1::chrono::system_clock, std::__1::chrono::duration<long long, std::__1::ratio<1l, 1000000l>>>) build_docker/./src/Common/AsynchronousMetrics.cpp:1559:5 4 0x55b55954258c in DB::AsynchronousMetrics::run() build_docker/./src/Common/AsynchronousMetrics.cpp:354:13 5 0x55b559549111 in DB::AsynchronousMetrics::start()::$_0::operator()() const build_docker/./src/Common/AsynchronousMetrics.cpp:273:62 6 0x55b559549111 in decltype(std::declval<DB::AsynchronousMetrics::start()::$_0&>()()) std::__1::__invoke[abi:v15000]<DB::AsynchronousMetrics::start()::$_0&>(DB::AsynchronousMetrics::start()::$_0&) build_docker/./contrib/llvm-project/libcxx/include/__functional/invoke.h:394:23 7 0x55b559549111 in decltype(auto) std::__1::__apply_tuple_impl[abi:v15000]<DB::AsynchronousMetrics::start()::$_0&, std::__1::tuple<>&>(DB::AsynchronousMetrics::start()::$_0&, std::__1::tuple<>&, std::__1::__tuple_indices<>) build_docker/./contrib/llvm-project/libcxx/include/tuple:1789:1 8 0x55b559549111 in decltype(auto) std::__1::apply[abi:v15000]<DB::AsynchronousMetrics::start()::$_0&, std::__1::tuple<>&>(DB::AsynchronousMetrics::start()::$_0&, std::__1::tuple<>&) build_docker/./contrib/llvm-project/libcxx/include/tuple:1798:1 9 0x55b559549111 in ThreadFromGlobalPoolImpl<true>::ThreadFromGlobalPoolImpl<DB::AsynchronousMetrics::start()::$_0>(DB::AsynchronousMetrics::start()::$_0&&)::'lambda'()::operator()() build_docker/./src/Common/ThreadPool.h:253:13 10 0x55b559549111 in decltype(std::declval<DB::AsynchronousMetrics::start()::$_0>()()) std::__1::__invoke[abi:v15000]<ThreadFromGlobalPoolImpl<true>::ThreadFromGlobalPoolImpl<DB::AsynchronousMetrics::start()::$_0>(DB::AsynchronousMetrics::start()::$_0&&)::'lambda'()&>(DB::AsynchronousMetrics::start()::$_0&&) build_docker/./contrib/llvm-project/libcxx/include/__functional/invoke.h:394:23 11 0x55b559549111 in void std::__1::__invoke_void_return_wrapper<void, true>::__call<ThreadFromGlobalPoolImpl<true>::ThreadFromGlobalPoolImpl<DB::AsynchronousMetrics::start()::$_0>(DB::AsynchronousMetrics::start()::$_0&&)::'lambda'()&>(ThreadFromGlobalPoolImpl<true>::ThreadFromGlobalPoolImpl<DB::AsynchronousMetrics::start()::$_0>(DB::AsynchronousMetrics::start()::$_0&&)::'lambda'()&) build_docker/./contrib/llvm-project/libcxx/include/__functional/invoke.h:479:9 12 0x55b559549111 in std::__1::__function::__default_alloc_func<ThreadFromGlobalPoolImpl<true>::ThreadFromGlobalPoolImpl<DB::AsynchronousMetrics::start()::$_0>(DB::AsynchronousMetrics::start()::$_0&&)::'lambda'(), void ()>::operator()[abi:v15000]() build_docker/./contrib/llvm-project/libcxx/include/__functional/function.h:235:12 13 0x55b559549111 in void std::__1::__function::__policy_invoker<void ()>::__call_impl<std::__1::__function::__default_alloc_func<ThreadFromGlobalPoolImpl<true>::ThreadFromGlobalPoolImpl<DB::AsynchronousMetrics::start()::$_0>(DB::AsynchronousMetrics::start()::$_0&&)::'lambda'(), void ()>>(std::__1::__function::__policy_storage const*) build_docker/./contrib/llvm-project/libcxx/include/__functional/function.h:716:16 14 0x55b5593eb38a in std::__1::__function::__policy_func<void ()>::operator()[abi:v15000]() const build_docker/./contrib/llvm-project/libcxx/include/__functional/function.h:848:16 15 0x55b5593eb38a in std::__1::function<void ()>::operator()() const build_docker/./contrib/llvm-project/libcxx/include/__functional/function.h:1187:12 16 0x55b5593eb38a in ThreadPoolImpl<std::__1::thread>::worker(std::__1::__list_iterator<std::__1::thread, void*>) build_docker/./src/Common/ThreadPool.cpp:421:13 17 0x55b5593f9a0a in void ThreadPoolImpl<std::__1::thread>::scheduleImpl<void>(std::__1::function<void ()>, Priority, std::__1::optional<unsigned long>, bool)::'lambda0'()::operator()() const build_docker/./src/Common/ThreadPool.cpp:183:73 18 0x55b5593f9a0a in decltype(std::declval<void>()()) std::__1::__invoke[abi:v15000]<void ThreadPoolImpl<std::__1::thread>::scheduleImpl<void>(std::__1::function<void ()>, Priority, std::__1::optional<unsigned long>, bool)::'lambda0'()>(void&&) build_docker/./contrib/llvm-project/libcxx/include/__functional/invoke.h:394:23 19 0x55b5593f9a0a in void std::__1::__thread_execute[abi:v15000]<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct>>, void ThreadPoolImpl<std::__1::thread>::scheduleImpl<void>(std::__1::function<void ()>, Priority, std::__1::optional<unsigned long>, bool)::'lambda0'()>(std::__1::tuple<void, void ThreadPoolImpl<std::__1::thread>::scheduleImpl<void>(std::__1::function<void ()>, Priority, std::__1::optional<unsigned long>, bool)::'lambda0'()>&, std::__1::__tuple_indices<>) build_docker/./contrib/llvm-project/libcxx/include/thread:284:5 20 0x55b5593f9a0a in void* std::__1::__thread_proxy[abi:v15000]<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct>>, void ThreadPoolImpl<std::__1::thread>::scheduleImpl<void>(std::__1::function<void ()>, Priority, std::__1::optional<unsigned long>, bool)::'lambda0'()>>(void*) build_docker/./contrib/llvm-project/libcxx/include/thread:295:5 21 0x7f7ff3899ac2 (/lib/x86_64-linux-gnu/libc.so.6+0x94ac2) (BuildId: a43bfc8428df6623cd498c9c0caeb91aec9be4f9) 22 0x7f7ff392ba3f (/lib/x86_64-linux-gnu/libc.so.6+0x126a3f) (BuildId: a43bfc8428df6623cd498c9c0caeb91aec9be4f9) Member fields were destroyed 0 0x55b541a72c9d in __sanitizer_dtor_callback_fields (/usr/bin/clickhouse+0x7c6dc9d) (BuildId: 57941f7730deefd0a2028f1d9f3e173472a4aa76) 1 0x55b559304b83 in std::__1::weak_ptr<DB::Context const>::~weak_ptr() build_docker/./contrib/llvm-project/libcxx/include/__memory/shared_ptr.h:1397:26 2 0x55b559304b83 in std::__1::weak_ptr<DB::Context const>::~weak_ptr() build_docker/./contrib/llvm-project/libcxx/include/__memory/shared_ptr.h:1553:1 3 0x55b559304b83 in DB::WithContextImpl<std::__1::shared_ptr<DB::Context const>>::~WithContextImpl() build_docker/./src/Interpreters/Context_fwd.h:30:8 4 0x55b559304b83 in DB::ServerAsynchronousMetrics::~ServerAsynchronousMetrics() build_docker/./src/Interpreters/ServerAsynchronousMetrics.h:10:7 5 0x55b559304b83 in DB::Server::main(std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>>> const&) build_docker/./programs/server/Server.cpp:1995:1 6 0x55b5789f579e in Poco::Util::Application::run() build_docker/./base/poco/Util/src/Application.cpp:315:8 7 0x55b5592d762a in DB::Server::run() build_docker/./programs/server/Server.cpp:397:25 8 0x55b578a3efdf in Poco::Util::ServerApplication::run(int, char**) build_docker/./base/poco/Util/src/ServerApplication.cpp:131:9 9 0x55b5592d0489 in mainEntryClickHouseServer(int, char**) build_docker/./programs/server/Server.cpp:203:20 10 0x55b541acd4ab in main build_docker/./programs/main.cpp:505:12 11 0x7f7ff382ed8f (/lib/x86_64-linux-gnu/libc.so.6+0x29d8f) (BuildId: a43bfc8428df6623cd498c9c0caeb91aec9be4f9) SUMMARY: MemorySanitizer: use-of-uninitialized-value build_docker/./contrib/llvm-project/libcxx/include/__memory/shared_ptr.h:1645:20 in std::__1::weak_ptr<DB::Context const>::lock() const [1]: https://s3.amazonaws.com/clickhouse-test-reports/52717/fcdead023c4350233ef1e0f7f82a71653ed62229/stress_test__tsan_.html [2]: https://s3.amazonaws.com/clickhouse-test-reports/52717/fcdead023c4350233ef1e0f7f82a71653ed62229/stress_test__msan_.html Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2023-12-10 14:37:36 +00:00
: WithContext(global_context_)
, AsynchronousMetrics(update_period_seconds, protocol_server_metrics_func_)
, heavy_metric_update_period(heavy_metrics_update_period_seconds)
{
/// sanity check
if (update_period_seconds == 0 || heavy_metrics_update_period_seconds == 0)
throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Setting asynchronous_metrics_update_period_s and asynchronous_heavy_metrics_update_period_s must not be zero");
}
2022-11-09 14:51:41 +00:00
2022-11-10 11:56:27 +00:00
void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values, TimePoint update_time, TimePoint current_time)
{
if (auto mark_cache = getContext()->getMarkCache())
{
new_values["MarkCacheBytes"] = { mark_cache->sizeInBytes(), "Total size of mark cache in bytes" };
new_values["MarkCacheFiles"] = { mark_cache->count(), "Total number of mark files cached in the mark cache" };
}
if (auto uncompressed_cache = getContext()->getUncompressedCache())
{
new_values["UncompressedCacheBytes"] = { uncompressed_cache->sizeInBytes(),
"Total size of uncompressed cache in bytes. Uncompressed cache does not usually improve the performance and should be mostly avoided." };
new_values["UncompressedCacheCells"] = { uncompressed_cache->count(),
"Total number of entries in the uncompressed cache. Each entry represents a decompressed block of data. Uncompressed cache does not usually improve performance and should be mostly avoided." };
}
if (auto index_mark_cache = getContext()->getIndexMarkCache())
{
new_values["IndexMarkCacheBytes"] = { index_mark_cache->sizeInBytes(), "Total size of mark cache for secondary indices in bytes." };
new_values["IndexMarkCacheFiles"] = { index_mark_cache->count(), "Total number of mark files cached in the mark cache for secondary indices." };
}
if (auto index_uncompressed_cache = getContext()->getIndexUncompressedCache())
{
new_values["IndexUncompressedCacheBytes"] = { index_uncompressed_cache->sizeInBytes(),
"Total size of uncompressed cache in bytes for secondary indices. Uncompressed cache does not usually improve the performance and should be mostly avoided." };
new_values["IndexUncompressedCacheCells"] = { index_uncompressed_cache->count(),
"Total number of entries in the uncompressed cache for secondary indices. Each entry represents a decompressed block of data. Uncompressed cache does not usually improve performance and should be mostly avoided." };
}
if (auto mmap_cache = getContext()->getMMappedFileCache())
{
new_values["MMapCacheCells"] = { mmap_cache->count(),
"The number of files opened with `mmap` (mapped in memory)."
" This is used for queries with the setting `local_filesystem_read_method` set to `mmap`."
" The files opened with `mmap` are kept in the cache to avoid costly TLB flushes."};
}
if (auto query_cache = getContext()->getQueryCache())
{
new_values["QueryCacheBytes"] = { query_cache->sizeInBytes(), "Total size of the query cache in bytes." };
new_values["QueryCacheEntries"] = { query_cache->count(), "Total number of entries in the query cache." };
}
{
auto caches = FileCacheFactory::instance().getAll();
size_t total_bytes = 0;
size_t total_files = 0;
for (const auto & [_, cache_data] : caches)
{
total_bytes += cache_data->cache->getUsedCacheSize();
total_files += cache_data->cache->getFileSegmentsNum();
}
new_values["FilesystemCacheBytes"] = { total_bytes,
"Total bytes in the `cache` virtual filesystem. This cache is hold on disk." };
new_values["FilesystemCacheFiles"] = { total_files,
"Total number of cached file segments in the `cache` virtual filesystem. This cache is hold on disk." };
}
#if USE_EMBEDDED_COMPILER
if (auto * compiled_expression_cache = CompiledExpressionCacheFactory::instance().tryGetCache())
{
new_values["CompiledExpressionCacheBytes"] = { compiled_expression_cache->sizeInBytes(),
"Total bytes used for the cache of JIT-compiled code." };
new_values["CompiledExpressionCacheCount"] = { compiled_expression_cache->count(),
"Total entries in the cache of JIT-compiled code." };
}
#endif
new_values["Uptime"] = { getContext()->getUptimeSeconds(),
"The server uptime in seconds. It includes the time spent for server initialization before accepting connections." };
if (const auto stats = getHashTablesCacheStatistics())
{
new_values["HashTableStatsCacheEntries"] = { stats->entries,
"The number of entries in the cache of hash table sizes."
" The cache for hash table sizes is used for predictive optimization of GROUP BY." };
new_values["HashTableStatsCacheHits"] = { stats->hits,
"The number of times the prediction of a hash table size was correct." };
new_values["HashTableStatsCacheMisses"] = { stats->misses,
"The number of times the prediction of a hash table size was incorrect." };
}
/// Free space in filesystems at data path and logs path.
{
auto stat = getStatVFS(getContext()->getPath());
new_values["FilesystemMainPathTotalBytes"] = { stat.f_blocks * stat.f_frsize,
"The size of the volume where the main ClickHouse path is mounted, in bytes." };
new_values["FilesystemMainPathAvailableBytes"] = { stat.f_bavail * stat.f_frsize,
"Available bytes on the volume where the main ClickHouse path is mounted." };
new_values["FilesystemMainPathUsedBytes"] = { (stat.f_blocks - stat.f_bavail) * stat.f_frsize,
"Used bytes on the volume where the main ClickHouse path is mounted." };
new_values["FilesystemMainPathTotalINodes"] = { stat.f_files,
"The total number of inodes on the volume where the main ClickHouse path is mounted. If it is less than 25 million, it indicates a misconfiguration." };
new_values["FilesystemMainPathAvailableINodes"] = { stat.f_favail,
"The number of available inodes on the volume where the main ClickHouse path is mounted. If it is close to zero, it indicates a misconfiguration, and you will get 'no space left on device' even when the disk is not full." };
new_values["FilesystemMainPathUsedINodes"] = { stat.f_files - stat.f_favail,
"The number of used inodes on the volume where the main ClickHouse path is mounted. This value mostly corresponds to the number of files." };
}
{
/// Current working directory of the server is the directory with logs.
auto stat = getStatVFS(".");
new_values["FilesystemLogsPathTotalBytes"] = { stat.f_blocks * stat.f_frsize,
"The size of the volume where ClickHouse logs path is mounted, in bytes. It's recommended to have at least 10 GB for logs." };
new_values["FilesystemLogsPathAvailableBytes"] = { stat.f_bavail * stat.f_frsize,
"Available bytes on the volume where ClickHouse logs path is mounted. If this value approaches zero, you should tune the log rotation in the configuration file." };
new_values["FilesystemLogsPathUsedBytes"] = { (stat.f_blocks - stat.f_bavail) * stat.f_frsize,
"Used bytes on the volume where ClickHouse logs path is mounted." };
new_values["FilesystemLogsPathTotalINodes"] = { stat.f_files,
"The total number of inodes on the volume where ClickHouse logs path is mounted." };
new_values["FilesystemLogsPathAvailableINodes"] = { stat.f_favail,
"The number of available inodes on the volume where ClickHouse logs path is mounted." };
new_values["FilesystemLogsPathUsedINodes"] = { stat.f_files - stat.f_favail,
"The number of used inodes on the volume where ClickHouse logs path is mounted." };
}
/// Free and total space on every configured disk.
{
DisksMap disks_map = getContext()->getDisksMap();
for (const auto & [name, disk] : disks_map)
{
auto total = disk->getTotalSpace();
/// Some disks don't support information about the space.
if (!total)
continue;
auto available = disk->getAvailableSpace();
auto unreserved = disk->getUnreservedSpace();
2023-04-29 16:55:19 +00:00
new_values[fmt::format("DiskTotal_{}", name)] = { *total,
"The total size in bytes of the disk (virtual filesystem). Remote filesystems may not provide this information." };
if (available)
{
new_values[fmt::format("DiskUsed_{}", name)] = { *total - *available,
"Used bytes on the disk (virtual filesystem). Remote filesystems not always provide this information." };
new_values[fmt::format("DiskAvailable_{}", name)] = { *available,
"Available bytes on the disk (virtual filesystem). Remote filesystems may not provide this information." };
}
if (unreserved)
new_values[fmt::format("DiskUnreserved_{}", name)] = { *unreserved,
"Available bytes on the disk (virtual filesystem) without the reservations for merges, fetches, and moves. Remote filesystems may not provide this information." };
}
}
{
auto databases = DatabaseCatalog::instance().getDatabases();
size_t max_queue_size = 0;
size_t max_inserts_in_queue = 0;
size_t max_merges_in_queue = 0;
size_t sum_queue_size = 0;
size_t sum_inserts_in_queue = 0;
size_t sum_merges_in_queue = 0;
size_t max_absolute_delay = 0;
size_t max_relative_delay = 0;
size_t max_part_count_for_partition = 0;
size_t number_of_databases = 0;
for (auto [db_name, _] : databases)
if (db_name != DatabaseCatalog::TEMPORARY_DATABASE)
++number_of_databases; /// filter out the internal database for temporary tables, system table "system.databases" behaves the same way
size_t total_number_of_tables = 0;
size_t total_number_of_bytes = 0;
size_t total_number_of_rows = 0;
size_t total_number_of_parts = 0;
2023-08-20 03:05:54 +00:00
size_t total_number_of_tables_system = 0;
size_t total_number_of_bytes_system = 0;
size_t total_number_of_rows_system = 0;
size_t total_number_of_parts_system = 0;
for (const auto & db : databases)
{
/// Check if database can contain MergeTree tables
if (!db.second->canContainMergeTreeTables())
continue;
2023-08-20 03:05:54 +00:00
bool is_system = db.first == DatabaseCatalog::SYSTEM_DATABASE;
for (auto iterator = db.second->getTablesIterator(getContext()); iterator->isValid(); iterator->next())
{
++total_number_of_tables;
2023-08-20 03:05:54 +00:00
if (is_system)
++total_number_of_tables_system;
const auto & table = iterator->table();
if (!table)
continue;
if (MergeTreeData * table_merge_tree = dynamic_cast<MergeTreeData *>(table.get()))
{
const auto & settings = getContext()->getSettingsRef();
calculateMax(max_part_count_for_partition, table_merge_tree->getMaxPartsCountAndSizeForPartition().first);
2023-08-20 03:05:54 +00:00
size_t bytes = table_merge_tree->totalBytes(settings).value();
size_t rows = table_merge_tree->totalRows(settings).value();
size_t parts = table_merge_tree->getActivePartsCount();
total_number_of_bytes += bytes;
total_number_of_rows += rows;
total_number_of_parts += parts;
if (is_system)
{
total_number_of_bytes_system += bytes;
total_number_of_rows_system += rows;
total_number_of_parts_system += parts;
}
}
if (StorageReplicatedMergeTree * table_replicated_merge_tree = typeid_cast<StorageReplicatedMergeTree *>(table.get()))
{
2022-12-22 13:31:42 +00:00
ReplicatedTableStatus status;
table_replicated_merge_tree->getStatus(status, false);
calculateMaxAndSum(max_queue_size, sum_queue_size, status.queue.queue_size);
calculateMaxAndSum(max_inserts_in_queue, sum_inserts_in_queue, status.queue.inserts_in_queue);
calculateMaxAndSum(max_merges_in_queue, sum_merges_in_queue, status.queue.merges_in_queue);
if (!status.is_readonly)
{
try
{
time_t absolute_delay = 0;
time_t relative_delay = 0;
table_replicated_merge_tree->getReplicaDelays(absolute_delay, relative_delay);
calculateMax(max_absolute_delay, absolute_delay);
calculateMax(max_relative_delay, relative_delay);
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__,
"Cannot get replica delay for table: " + backQuoteIfNeed(db.first) + "." + backQuoteIfNeed(iterator->name()));
}
}
}
}
}
new_values["ReplicasMaxQueueSize"] = { max_queue_size, "Maximum queue size (in the number of operations like get, merge) across Replicated tables." };
new_values["ReplicasMaxInsertsInQueue"] = { max_inserts_in_queue, "Maximum number of INSERT operations in the queue (still to be replicated) across Replicated tables." };
new_values["ReplicasMaxMergesInQueue"] = { max_merges_in_queue, "Maximum number of merge operations in the queue (still to be applied) across Replicated tables." };
new_values["ReplicasSumQueueSize"] = { sum_queue_size, "Sum queue size (in the number of operations like get, merge) across Replicated tables." };
new_values["ReplicasSumInsertsInQueue"] = { sum_inserts_in_queue, "Sum of INSERT operations in the queue (still to be replicated) across Replicated tables." };
new_values["ReplicasSumMergesInQueue"] = { sum_merges_in_queue, "Sum of merge operations in the queue (still to be applied) across Replicated tables." };
new_values["ReplicasMaxAbsoluteDelay"] = { max_absolute_delay, "Maximum difference in seconds between the most fresh replicated part and the most fresh data part still to be replicated, across Replicated tables. A very high value indicates a replica with no data." };
new_values["ReplicasMaxRelativeDelay"] = { max_relative_delay, "Maximum difference between the replica delay and the delay of the most up-to-date replica of the same table, across Replicated tables." };
new_values["MaxPartCountForPartition"] = { max_part_count_for_partition, "Maximum number of parts per partition across all partitions of all tables of MergeTree family. Values larger than 300 indicates misconfiguration, overload, or massive data loading." };
new_values["NumberOfDatabases"] = { number_of_databases, "Total number of databases on the server." };
new_values["NumberOfTables"] = { total_number_of_tables, "Total number of tables summed across the databases on the server, excluding the databases that cannot contain MergeTree tables."
" The excluded database engines are those who generate the set of tables on the fly, like `Lazy`, `MySQL`, `PostgreSQL`, `SQlite`."};
new_values["TotalBytesOfMergeTreeTables"] = { total_number_of_bytes, "Total amount of bytes (compressed, including data and indices) stored in all tables of MergeTree family." };
new_values["TotalRowsOfMergeTreeTables"] = { total_number_of_rows, "Total amount of rows (records) stored in all tables of MergeTree family." };
new_values["TotalPartsOfMergeTreeTables"] = { total_number_of_parts, "Total amount of data parts in all tables of MergeTree family."
" Numbers larger than 10 000 will negatively affect the server startup time and it may indicate unreasonable choice of the partition key." };
2023-08-20 03:05:54 +00:00
new_values["NumberOfTablesSystem"] = { total_number_of_tables_system, "Total number of tables in the system database on the server stored in tables of MergeTree family."};
new_values["TotalBytesOfMergeTreeTablesSystem"] = { total_number_of_bytes_system, "Total amount of bytes (compressed, including data and indices) stored in tables of MergeTree family in the system database." };
new_values["TotalRowsOfMergeTreeTablesSystem"] = { total_number_of_rows_system, "Total amount of rows (records) stored in tables of MergeTree family in the system database." };
new_values["TotalPartsOfMergeTreeTablesSystem"] = { total_number_of_parts_system, "Total amount of data parts in tables of MergeTree family in the system database." };
}
#if USE_NURAFT
{
auto keeper_dispatcher = getContext()->tryGetKeeperDispatcher();
if (keeper_dispatcher)
updateKeeperInformation(*keeper_dispatcher, new_values);
}
#endif
2022-11-10 11:56:27 +00:00
updateHeavyMetricsIfNeeded(current_time, update_time, new_values);
}
void ServerAsynchronousMetrics::logImpl(AsynchronousMetricValues & new_values)
{
/// Log the new metrics.
if (auto asynchronous_metric_log = getContext()->getAsynchronousMetricLog())
asynchronous_metric_log->addValues(new_values);
}
void ServerAsynchronousMetrics::updateDetachedPartsStats()
{
DetachedPartsStats current_values{};
for (const auto & db : DatabaseCatalog::instance().getDatabases())
{
if (!db.second->canContainMergeTreeTables())
continue;
for (auto iterator = db.second->getTablesIterator(getContext()); iterator->isValid(); iterator->next())
{
const auto & table = iterator->table();
if (!table)
continue;
if (MergeTreeData * table_merge_tree = dynamic_cast<MergeTreeData *>(table.get()))
{
for (const auto & detached_part: table_merge_tree->getDetachedParts())
{
if (!detached_part.valid_name)
continue;
if (detached_part.prefix.empty())
++current_values.detached_by_user;
++current_values.count;
}
}
}
}
detached_parts_stats = current_values;
}
void ServerAsynchronousMetrics::updateHeavyMetricsIfNeeded(TimePoint current_time, TimePoint update_time, AsynchronousMetricValues & new_values)
{
const auto time_after_previous_update = current_time - heavy_metric_previous_update_time;
const bool update_heavy_metric = time_after_previous_update >= heavy_metric_update_period || first_run;
Stopwatch watch;
if (update_heavy_metric)
{
heavy_metric_previous_update_time = update_time;
if (first_run)
heavy_update_interval = heavy_metric_update_period.count();
else
heavy_update_interval = std::chrono::duration_cast<std::chrono::microseconds>(time_after_previous_update).count() / 1e6;
/// Test shows that listing 100000 entries consuming around 0.15 sec.
updateDetachedPartsStats();
watch.stop();
/// Normally heavy metrics don't delay the rest of the metrics calculation
/// otherwise log the warning message
auto log_level = std::make_pair(DB::LogsLevel::trace, Poco::Message::PRIO_TRACE);
if (watch.elapsedSeconds() > (update_period.count() / 2.))
log_level = std::make_pair(DB::LogsLevel::debug, Poco::Message::PRIO_DEBUG);
else if (watch.elapsedSeconds() > (update_period.count() / 4. * 3))
log_level = std::make_pair(DB::LogsLevel::warning, Poco::Message::PRIO_WARNING);
LOG_IMPL(log, log_level.first, log_level.second,
"Update heavy metrics. "
"Update period {} sec. "
"Update heavy metrics period {} sec. "
"Heavy metrics calculation elapsed: {} sec.",
update_period.count(),
heavy_metric_update_period.count(),
watch.elapsedSeconds());
}
new_values["AsynchronousHeavyMetricsCalculationTimeSpent"] = { watch.elapsedSeconds(), "Time in seconds spent for calculation of asynchronous heavy (tables related) metrics (this is the overhead of asynchronous metrics)." };
new_values["AsynchronousHeavyMetricsUpdateInterval"] = { heavy_update_interval, "Heavy (tables related) metrics update interval" };
new_values["NumberOfDetachedParts"] = { detached_parts_stats.count, "The total number of parts detached from MergeTree tables. A part can be detached by a user with the `ALTER TABLE DETACH` query or by the server itself it the part is broken, unexpected or unneeded. The server does not care about detached parts and they can be removed." };
new_values["NumberOfDetachedByUserParts"] = { detached_parts_stats.detached_by_user, "The total number of parts detached from MergeTree tables by users with the `ALTER TABLE DETACH` query (as opposed to unexpected, broken or ignored parts). The server does not care about detached parts and they can be removed." };
}
}