2022-11-09 12:37:42 +00:00
# include <Interpreters/ServerAsynchronousMetrics.h>
# include <Interpreters/Aggregator.h>
# include <Interpreters/AsynchronousMetricLog.h>
# include <Interpreters/DatabaseCatalog.h>
# include <Interpreters/Cache/FileCache.h>
# include <Interpreters/Cache/FileCacheFactory.h>
# include <Interpreters/Context.h>
2023-08-14 16:25:52 +00:00
# include <Interpreters/Cache/QueryCache.h>
2022-11-10 10:41:49 +00:00
# include <Interpreters/JIT/CompiledExpressionCache.h>
2022-11-09 12:37:42 +00:00
# include <Databases/IDatabase.h>
# include <IO/UncompressedCache.h>
# include <IO/MMappedFileCache.h>
# include <Storages/MergeTree/MergeTreeData.h>
# include <Storages/StorageMergeTree.h>
# include <Storages/StorageReplicatedMergeTree.h>
# include <Storages/MarkCache.h>
# include <Coordination/KeeperAsynchronousMetrics.h>
namespace DB
{
2023-08-14 18:54:15 +00:00
namespace ErrorCodes
{
extern const int INVALID_SETTING_VALUE ;
}
2022-11-09 12:37:42 +00:00
namespace
{
template < typename Max , typename T >
2022-11-10 10:41:49 +00:00
void calculateMax ( Max & max , T x )
2022-11-09 12:37:42 +00:00
{
if ( Max ( x ) > max )
max = x ;
}
template < typename Max , typename Sum , typename T >
2022-11-10 10:41:49 +00:00
void calculateMaxAndSum ( Max & max , Sum & sum , T x )
2022-11-09 12:37:42 +00:00
{
sum + = x ;
if ( Max ( x ) > max )
max = x ;
}
}
ServerAsynchronousMetrics : : ServerAsynchronousMetrics (
ContextPtr global_context_ ,
int update_period_seconds ,
int heavy_metrics_update_period_seconds ,
const ProtocolServerMetricsFunc & protocol_server_metrics_func_ )
Fix Context use-after-free in ServerAsynchronousMetrics
TSan found [1]:
WARNING: ThreadSanitizer: data race on vptr (ctor/dtor vs virtual call) (pid=598)
Write of size 8 at 0x7ffcf9ad9cb0 by main thread:
0 DB::AsynchronousMetrics::~AsynchronousMetrics() build_docker/./src/Common/AsynchronousMetrics.cpp:299:1 (clickhouse+0xf38ed6e) (BuildId: 31dcc7d77a0b2aaf9de7aca070b5f6ed6ac3dcbf)
1 DB::ServerAsynchronousMetrics::~ServerAsynchronousMetrics() build_docker/./src/Interpreters/ServerAsynchronousMetrics.h:10:7 (clickhouse+0xf2b2220) (BuildId: 31dcc7d77a0b2aaf9de7aca070b5f6ed6ac3dcbf)
2 DB::Server::main(std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>>> const&) build_docker/./programs/server/Server.cpp:1995:1 (clickhouse+0xf2b2220)
3 Poco::Util::Application::run() build_docker/./base/poco/Util/src/Application.cpp:315:8 (clickhouse+0x1d6c535e) (BuildId: 31dcc7d77a0b2aaf9de7aca070b5f6ed6ac3dcbf)
4 DB::Server::run() build_docker/./programs/server/Server.cpp:397:25 (clickhouse+0xf29d341) (BuildId: 31dcc7d77a0b2aaf9de7aca070b5f6ed6ac3dcbf)
5 Poco::Util::ServerApplication::run(int, char**) build_docker/./base/poco/Util/src/ServerApplication.cpp:131:9 (clickhouse+0x1d6e50b4) (BuildId: 31dcc7d77a0b2aaf9de7aca070b5f6ed6ac3dcbf)
6 mainEntryClickHouseServer(int, char**) build_docker/./programs/server/Server.cpp:203:20 (clickhouse+0xf29a4a3) (BuildId: 31dcc7d77a0b2aaf9de7aca070b5f6ed6ac3dcbf)
7 main build_docker/./programs/main.cpp:505:12 (clickhouse+0x72d72a0) (BuildId: 31dcc7d77a0b2aaf9de7aca070b5f6ed6ac3dcbf)
Previous read of size 8 at 0x7ffcf9ad9cb0 by thread T727:
0 DB::AsynchronousMetrics::update(std::__1::chrono::time_point<std::__1::chrono::system_clock, std::__1::chrono::duration<long long, std::__1::ratio<1l, 1000000l>>>) build_docker/./src/Common/AsynchronousMetrics.cpp:1559:5 (clickhouse+0xf38dec5) (BuildId: 31dcc7d77a0b2aaf9de7aca070b5f6ed6ac3dcbf)
1 DB::AsynchronousMetrics::run() build_docker/./src/Common/AsynchronousMetrics.cpp:354:13 (clickhouse+0xf38f996) (BuildId: 31dcc7d77a0b2aaf9de7aca070b5f6ed6ac3dcbf)
2 DB::AsynchronousMetrics::start()::$_0::operator()() const build_docker/./src/Common/AsynchronousMetrics.cpp:273:62 (clickhouse+0xf3921ca) (BuildId: 31dcc7d77a0b2aaf9de7aca070b5f6ed6ac3dcbf)
3 decltype(std::declval<DB::AsynchronousMetrics::start()::$_0&>()()) std::__1::__invoke[abi:v15000]<DB::AsynchronousMetrics::start()::$_0&>(DB::AsynchronousMetrics::start()::$_0&) build_docker/./contrib/llvm-project/libcxx/include/__functional/invoke.h:394:23 (clickhouse+0xf3921ca)
4 decltype(auto) std::__1::__apply_tuple_impl[abi:v15000]<DB::AsynchronousMetrics::start()::$_0&, std::__1::tuple<>&>(DB::AsynchronousMetrics::start()::$_0&, std::__1::tuple<>&, std::__1::__tuple_indices<>) build_docker/./contrib/llvm-project/libcxx/include/tuple:1789:1 (clickhouse+0xf3921ca)
5 decltype(auto) std::__1::apply[abi:v15000]<DB::AsynchronousMetrics::start()::$_0&, std::__1::tuple<>&>(DB::AsynchronousMetrics::start()::$_0&, std::__1::tuple<>&) build_docker/./contrib/llvm-project/libcxx/include/tuple:1798:1 (clickhouse+0xf3921ca)
6 ThreadFromGlobalPoolImpl<true>::ThreadFromGlobalPoolImpl<DB::AsynchronousMetrics::start()::$_0>(DB::AsynchronousMetrics::start()::$_0&&)::'lambda'()::operator()() build_docker/./src/Common/ThreadPool.h:253:13 (clickhouse+0xf3921ca)
7 decltype(std::declval<DB::AsynchronousMetrics::start()::$_0>()()) std::__1::__invoke[abi:v15000]<ThreadFromGlobalPoolImpl<true>::ThreadFromGlobalPoolImpl<DB::AsynchronousMetrics::start()::$_0>(DB::AsynchronousMetrics::start()::$_0&&)::'lambda'()&>(DB::AsynchronousMetrics::start()::$_0&&) build_docker/./contrib/llvm-project/libcxx/include/__functional/invoke.h:394:23 (clickhouse+0xf3921ca)
8 void std::__1::__invoke_void_return_wrapper<void, true>::__call<ThreadFromGlobalPoolImpl<true>::ThreadFromGlobalPoolImpl<DB::AsynchronousMetrics::start()::$_0>(DB::AsynchronousMetrics::start()::$_0&&)::'lambda'()&>(ThreadFromGlobalPoolImpl<true>::ThreadFromGlobalPoolImpl<DB::AsynchronousMetrics::start()::$_0>(DB::AsynchronousMetrics::start()::$_0&&)::'lambda'()&) build_docker/./contrib/llvm-project/libcxx/include/__functional/invoke.h:479:9 (clickhouse+0xf3921ca)
9 std::__1::__function::__default_alloc_func<ThreadFromGlobalPoolImpl<true>::ThreadFromGlobalPoolImpl<DB::AsynchronousMetrics::start()::$_0>(DB::AsynchronousMetrics::start()::$_0&&)::'lambda'(), void ()>::operator()[abi:v15000]() build_docker/./contrib/llvm-project/libcxx/include/__functional/function.h:235:12 (clickhouse+0xf3921ca)
10 void std::__1::__function::__policy_invoker<void ()>::__call_impl<std::__1::__function::__default_alloc_func<ThreadFromGlobalPoolImpl<true>::ThreadFromGlobalPoolImpl<DB::AsynchronousMetrics::start()::$_0>(DB::AsynchronousMetrics::start()::$_0&&)::'lambda'(), void ()>>(std::__1::__function::__policy_storage const*) build_docker/./contrib/llvm-project/libcxx/include/__functional/function.h:716:16 (clickhouse+0xf3921ca)
11 std::__1::__function::__policy_func<void ()>::operator()[abi:v15000]() const build_docker/./contrib/llvm-project/libcxx/include/__functional/function.h:848:16 (clickhouse+0xf305b2e) (BuildId: 31dcc7d77a0b2aaf9de7aca070b5f6ed6ac3dcbf)
12 std::__1::function<void ()>::operator()() const build_docker/./contrib/llvm-project/libcxx/include/__functional/function.h:1187:12 (clickhouse+0xf305b2e)
13 ThreadPoolImpl<std::__1::thread>::worker(std::__1::__list_iterator<std::__1::thread, void*>) build_docker/./src/Common/ThreadPool.cpp:421:13 (clickhouse+0xf305b2e)
14 void ThreadPoolImpl<std::__1::thread>::scheduleImpl<void>(std::__1::function<void ()>, Priority, std::__1::optional<unsigned long>, bool)::'lambda0'()::operator()() const build_docker/./src/Common/ThreadPool.cpp:183:73 (clickhouse+0xf30c6d1) (BuildId: 31dcc7d77a0b2aaf9de7aca070b5f6ed6ac3dcbf)
15 decltype(std::declval<void>()()) std::__1::__invoke[abi:v15000]<void ThreadPoolImpl<std::__1::thread>::scheduleImpl<void>(std::__1::function<void ()>, Priority, std::__1::optional<unsigned long>, bool)::'lambda0'()>(void&&) build_docker/./contrib/llvm-project/libcxx/include/__functional/invoke.h:394:23 (clickhouse+0xf30c6d1)
16 void std::__1::__thread_execute[abi:v15000]<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct>>, void ThreadPoolImpl<std::__1::thread>::scheduleImpl<void>(std::__1::function<void ()>, Priority, std::__1::optional<unsigned long>, bool)::'lambda0'()>(std::__1::tuple<void, void ThreadPoolImpl<std::__1::thread>::scheduleImpl<void>(std::__1::function<void ()>, Priority, std::__1::optional<unsigned long>, bool)::'lambda0'()>&, std::__1::__tuple_indices<>) build_docker/./contrib/llvm-project/libcxx/include/thread:284:5 (clickhouse+0xf30c6d1)
17 void* std::__1::__thread_proxy[abi:v15000]<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct>>, void ThreadPoolImpl<std::__1::thread>::scheduleImpl<void>(std::__1::function<void ()>, Priority, std::__1::optional<unsigned long>, bool)::'lambda0'()>>(void*) build_docker/./contrib/llvm-project/libcxx/include/thread:295:5 (clickhouse+0xf30c6d1)
SUMMARY: ThreadSanitizer: data race on vptr (ctor/dtor vs virtual call) build_docker/./src/Common/AsynchronousMetrics.cpp:299:1 in DB::AsynchronousMetrics::~AsynchronousMetrics()
Or MSan [2]:
==573==WARNING: MemorySanitizer: use-of-uninitialized-value
0 0x55b57079112d in std::__1::weak_ptr<DB::Context const>::lock() const build_docker/./contrib/llvm-project/libcxx/include/__memory/shared_ptr.h:1645:20
1 0x55b57079112d in DB::WithContextImpl<std::__1::shared_ptr<DB::Context const>>::getContext() const build_docker/./src/Interpreters/Context_fwd.h:41:28
2 0x55b57079112d in DB::ServerAsynchronousMetrics::updateImpl(std::__1::unordered_map<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>, DB::AsynchronousMetricValue, std::__1::hash<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>>, std::__1::equal_to<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>>, std::__1::allocator<std::__1::pair<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const, DB::AsynchronousMetricValue>>>&, std::__1::chrono::time_point<std::__1::chrono::system_clock, std::__1::chrono::duration<long long, std::__1::ratio<1l, 1000000l>>>, std::__1::chrono::time_point<std::__1::chrono::system_clock, std::__1::chrono::duration<long long, std::__1::ratio<1l, 1000000l>>>) build_docker/./src/Interpreters/ServerAsynchronousMetrics.cpp:260:63
3 0x55b559540309 in DB::AsynchronousMetrics::update(std::__1::chrono::time_point<std::__1::chrono::system_clock, std::__1::chrono::duration<long long, std::__1::ratio<1l, 1000000l>>>) build_docker/./src/Common/AsynchronousMetrics.cpp:1559:5
4 0x55b55954258c in DB::AsynchronousMetrics::run() build_docker/./src/Common/AsynchronousMetrics.cpp:354:13
5 0x55b559549111 in DB::AsynchronousMetrics::start()::$_0::operator()() const build_docker/./src/Common/AsynchronousMetrics.cpp:273:62
6 0x55b559549111 in decltype(std::declval<DB::AsynchronousMetrics::start()::$_0&>()()) std::__1::__invoke[abi:v15000]<DB::AsynchronousMetrics::start()::$_0&>(DB::AsynchronousMetrics::start()::$_0&) build_docker/./contrib/llvm-project/libcxx/include/__functional/invoke.h:394:23
7 0x55b559549111 in decltype(auto) std::__1::__apply_tuple_impl[abi:v15000]<DB::AsynchronousMetrics::start()::$_0&, std::__1::tuple<>&>(DB::AsynchronousMetrics::start()::$_0&, std::__1::tuple<>&, std::__1::__tuple_indices<>) build_docker/./contrib/llvm-project/libcxx/include/tuple:1789:1
8 0x55b559549111 in decltype(auto) std::__1::apply[abi:v15000]<DB::AsynchronousMetrics::start()::$_0&, std::__1::tuple<>&>(DB::AsynchronousMetrics::start()::$_0&, std::__1::tuple<>&) build_docker/./contrib/llvm-project/libcxx/include/tuple:1798:1
9 0x55b559549111 in ThreadFromGlobalPoolImpl<true>::ThreadFromGlobalPoolImpl<DB::AsynchronousMetrics::start()::$_0>(DB::AsynchronousMetrics::start()::$_0&&)::'lambda'()::operator()() build_docker/./src/Common/ThreadPool.h:253:13
10 0x55b559549111 in decltype(std::declval<DB::AsynchronousMetrics::start()::$_0>()()) std::__1::__invoke[abi:v15000]<ThreadFromGlobalPoolImpl<true>::ThreadFromGlobalPoolImpl<DB::AsynchronousMetrics::start()::$_0>(DB::AsynchronousMetrics::start()::$_0&&)::'lambda'()&>(DB::AsynchronousMetrics::start()::$_0&&) build_docker/./contrib/llvm-project/libcxx/include/__functional/invoke.h:394:23
11 0x55b559549111 in void std::__1::__invoke_void_return_wrapper<void, true>::__call<ThreadFromGlobalPoolImpl<true>::ThreadFromGlobalPoolImpl<DB::AsynchronousMetrics::start()::$_0>(DB::AsynchronousMetrics::start()::$_0&&)::'lambda'()&>(ThreadFromGlobalPoolImpl<true>::ThreadFromGlobalPoolImpl<DB::AsynchronousMetrics::start()::$_0>(DB::AsynchronousMetrics::start()::$_0&&)::'lambda'()&) build_docker/./contrib/llvm-project/libcxx/include/__functional/invoke.h:479:9
12 0x55b559549111 in std::__1::__function::__default_alloc_func<ThreadFromGlobalPoolImpl<true>::ThreadFromGlobalPoolImpl<DB::AsynchronousMetrics::start()::$_0>(DB::AsynchronousMetrics::start()::$_0&&)::'lambda'(), void ()>::operator()[abi:v15000]() build_docker/./contrib/llvm-project/libcxx/include/__functional/function.h:235:12
13 0x55b559549111 in void std::__1::__function::__policy_invoker<void ()>::__call_impl<std::__1::__function::__default_alloc_func<ThreadFromGlobalPoolImpl<true>::ThreadFromGlobalPoolImpl<DB::AsynchronousMetrics::start()::$_0>(DB::AsynchronousMetrics::start()::$_0&&)::'lambda'(), void ()>>(std::__1::__function::__policy_storage const*) build_docker/./contrib/llvm-project/libcxx/include/__functional/function.h:716:16
14 0x55b5593eb38a in std::__1::__function::__policy_func<void ()>::operator()[abi:v15000]() const build_docker/./contrib/llvm-project/libcxx/include/__functional/function.h:848:16
15 0x55b5593eb38a in std::__1::function<void ()>::operator()() const build_docker/./contrib/llvm-project/libcxx/include/__functional/function.h:1187:12
16 0x55b5593eb38a in ThreadPoolImpl<std::__1::thread>::worker(std::__1::__list_iterator<std::__1::thread, void*>) build_docker/./src/Common/ThreadPool.cpp:421:13
17 0x55b5593f9a0a in void ThreadPoolImpl<std::__1::thread>::scheduleImpl<void>(std::__1::function<void ()>, Priority, std::__1::optional<unsigned long>, bool)::'lambda0'()::operator()() const build_docker/./src/Common/ThreadPool.cpp:183:73
18 0x55b5593f9a0a in decltype(std::declval<void>()()) std::__1::__invoke[abi:v15000]<void ThreadPoolImpl<std::__1::thread>::scheduleImpl<void>(std::__1::function<void ()>, Priority, std::__1::optional<unsigned long>, bool)::'lambda0'()>(void&&) build_docker/./contrib/llvm-project/libcxx/include/__functional/invoke.h:394:23
19 0x55b5593f9a0a in void std::__1::__thread_execute[abi:v15000]<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct>>, void ThreadPoolImpl<std::__1::thread>::scheduleImpl<void>(std::__1::function<void ()>, Priority, std::__1::optional<unsigned long>, bool)::'lambda0'()>(std::__1::tuple<void, void ThreadPoolImpl<std::__1::thread>::scheduleImpl<void>(std::__1::function<void ()>, Priority, std::__1::optional<unsigned long>, bool)::'lambda0'()>&, std::__1::__tuple_indices<>) build_docker/./contrib/llvm-project/libcxx/include/thread:284:5
20 0x55b5593f9a0a in void* std::__1::__thread_proxy[abi:v15000]<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct>>, void ThreadPoolImpl<std::__1::thread>::scheduleImpl<void>(std::__1::function<void ()>, Priority, std::__1::optional<unsigned long>, bool)::'lambda0'()>>(void*) build_docker/./contrib/llvm-project/libcxx/include/thread:295:5
21 0x7f7ff3899ac2 (/lib/x86_64-linux-gnu/libc.so.6+0x94ac2) (BuildId: a43bfc8428df6623cd498c9c0caeb91aec9be4f9)
22 0x7f7ff392ba3f (/lib/x86_64-linux-gnu/libc.so.6+0x126a3f) (BuildId: a43bfc8428df6623cd498c9c0caeb91aec9be4f9)
Member fields were destroyed
0 0x55b541a72c9d in __sanitizer_dtor_callback_fields (/usr/bin/clickhouse+0x7c6dc9d) (BuildId: 57941f7730deefd0a2028f1d9f3e173472a4aa76)
1 0x55b559304b83 in std::__1::weak_ptr<DB::Context const>::~weak_ptr() build_docker/./contrib/llvm-project/libcxx/include/__memory/shared_ptr.h:1397:26
2 0x55b559304b83 in std::__1::weak_ptr<DB::Context const>::~weak_ptr() build_docker/./contrib/llvm-project/libcxx/include/__memory/shared_ptr.h:1553:1
3 0x55b559304b83 in DB::WithContextImpl<std::__1::shared_ptr<DB::Context const>>::~WithContextImpl() build_docker/./src/Interpreters/Context_fwd.h:30:8
4 0x55b559304b83 in DB::ServerAsynchronousMetrics::~ServerAsynchronousMetrics() build_docker/./src/Interpreters/ServerAsynchronousMetrics.h:10:7
5 0x55b559304b83 in DB::Server::main(std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>>> const&) build_docker/./programs/server/Server.cpp:1995:1
6 0x55b5789f579e in Poco::Util::Application::run() build_docker/./base/poco/Util/src/Application.cpp:315:8
7 0x55b5592d762a in DB::Server::run() build_docker/./programs/server/Server.cpp:397:25
8 0x55b578a3efdf in Poco::Util::ServerApplication::run(int, char**) build_docker/./base/poco/Util/src/ServerApplication.cpp:131:9
9 0x55b5592d0489 in mainEntryClickHouseServer(int, char**) build_docker/./programs/server/Server.cpp:203:20
10 0x55b541acd4ab in main build_docker/./programs/main.cpp:505:12
11 0x7f7ff382ed8f (/lib/x86_64-linux-gnu/libc.so.6+0x29d8f) (BuildId: a43bfc8428df6623cd498c9c0caeb91aec9be4f9)
SUMMARY: MemorySanitizer: use-of-uninitialized-value build_docker/./contrib/llvm-project/libcxx/include/__memory/shared_ptr.h:1645:20 in std::__1::weak_ptr<DB::Context const>::lock() const
[1]: https://s3.amazonaws.com/clickhouse-test-reports/52717/fcdead023c4350233ef1e0f7f82a71653ed62229/stress_test__tsan_.html
[2]: https://s3.amazonaws.com/clickhouse-test-reports/52717/fcdead023c4350233ef1e0f7f82a71653ed62229/stress_test__msan_.html
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2023-12-10 14:37:36 +00:00
: WithContext ( global_context_ )
, AsynchronousMetrics ( update_period_seconds , protocol_server_metrics_func_ )
2022-11-09 12:37:42 +00:00
, heavy_metric_update_period ( heavy_metrics_update_period_seconds )
2023-08-14 18:54:15 +00:00
{
/// sanity check
if ( update_period_seconds = = 0 | | heavy_metrics_update_period_seconds = = 0 )
throw Exception ( ErrorCodes : : INVALID_SETTING_VALUE , " Setting asynchronous_metrics_update_period_s and asynchronous_heavy_metrics_update_period_s must not be zero " ) ;
}
2022-11-09 14:51:41 +00:00
2022-11-10 11:56:27 +00:00
void ServerAsynchronousMetrics : : updateImpl ( AsynchronousMetricValues & new_values , TimePoint update_time , TimePoint current_time )
2022-11-09 12:37:42 +00:00
{
if ( auto mark_cache = getContext ( ) - > getMarkCache ( ) )
{
2023-08-21 18:05:24 +00:00
new_values [ " MarkCacheBytes " ] = { mark_cache - > sizeInBytes ( ) , " Total size of mark cache in bytes " } ;
2022-11-15 12:00:02 +00:00
new_values [ " MarkCacheFiles " ] = { mark_cache - > count ( ) , " Total number of mark files cached in the mark cache " } ;
2022-11-09 12:37:42 +00:00
}
if ( auto uncompressed_cache = getContext ( ) - > getUncompressedCache ( ) )
{
2023-08-21 18:05:24 +00:00
new_values [ " UncompressedCacheBytes " ] = { uncompressed_cache - > sizeInBytes ( ) ,
2022-11-15 12:00:02 +00:00
" Total size of uncompressed cache in bytes. Uncompressed cache does not usually improve the performance and should be mostly avoided. " } ;
new_values [ " UncompressedCacheCells " ] = { uncompressed_cache - > count ( ) ,
" Total number of entries in the uncompressed cache. Each entry represents a decompressed block of data. Uncompressed cache does not usually improve performance and should be mostly avoided. " } ;
2022-11-09 12:37:42 +00:00
}
if ( auto index_mark_cache = getContext ( ) - > getIndexMarkCache ( ) )
{
2023-08-21 18:05:24 +00:00
new_values [ " IndexMarkCacheBytes " ] = { index_mark_cache - > sizeInBytes ( ) , " Total size of mark cache for secondary indices in bytes. " } ;
2022-11-15 12:00:02 +00:00
new_values [ " IndexMarkCacheFiles " ] = { index_mark_cache - > count ( ) , " Total number of mark files cached in the mark cache for secondary indices. " } ;
2022-11-09 12:37:42 +00:00
}
if ( auto index_uncompressed_cache = getContext ( ) - > getIndexUncompressedCache ( ) )
{
2023-08-21 18:05:24 +00:00
new_values [ " IndexUncompressedCacheBytes " ] = { index_uncompressed_cache - > sizeInBytes ( ) ,
2022-11-15 12:00:02 +00:00
" Total size of uncompressed cache in bytes for secondary indices. Uncompressed cache does not usually improve the performance and should be mostly avoided. " } ;
new_values [ " IndexUncompressedCacheCells " ] = { index_uncompressed_cache - > count ( ) ,
" Total number of entries in the uncompressed cache for secondary indices. Each entry represents a decompressed block of data. Uncompressed cache does not usually improve performance and should be mostly avoided. " } ;
2022-11-09 12:37:42 +00:00
}
if ( auto mmap_cache = getContext ( ) - > getMMappedFileCache ( ) )
{
2022-11-15 12:00:02 +00:00
new_values [ " MMapCacheCells " ] = { mmap_cache - > count ( ) ,
" The number of files opened with `mmap` (mapped in memory). "
" This is used for queries with the setting `local_filesystem_read_method` set to `mmap`. "
" The files opened with `mmap` are kept in the cache to avoid costly TLB flushes. " } ;
2022-11-09 12:37:42 +00:00
}
2023-07-27 09:49:34 +00:00
if ( auto query_cache = getContext ( ) - > getQueryCache ( ) )
{
2023-08-21 18:05:24 +00:00
new_values [ " QueryCacheBytes " ] = { query_cache - > sizeInBytes ( ) , " Total size of the query cache in bytes. " } ;
2023-07-27 09:49:34 +00:00
new_values [ " QueryCacheEntries " ] = { query_cache - > count ( ) , " Total number of entries in the query cache. " } ;
}
2022-11-09 12:37:42 +00:00
{
auto caches = FileCacheFactory : : instance ( ) . getAll ( ) ;
2022-11-15 12:00:02 +00:00
size_t total_bytes = 0 ;
size_t total_files = 0 ;
2022-11-09 12:37:42 +00:00
for ( const auto & [ _ , cache_data ] : caches )
{
2022-11-15 12:00:02 +00:00
total_bytes + = cache_data - > cache - > getUsedCacheSize ( ) ;
total_files + = cache_data - > cache - > getFileSegmentsNum ( ) ;
2022-11-09 12:37:42 +00:00
}
2022-11-15 12:00:02 +00:00
new_values [ " FilesystemCacheBytes " ] = { total_bytes ,
" Total bytes in the `cache` virtual filesystem. This cache is hold on disk. " } ;
new_values [ " FilesystemCacheFiles " ] = { total_files ,
" Total number of cached file segments in the `cache` virtual filesystem. This cache is hold on disk. " } ;
2022-11-09 12:37:42 +00:00
}
# if USE_EMBEDDED_COMPILER
if ( auto * compiled_expression_cache = CompiledExpressionCacheFactory : : instance ( ) . tryGetCache ( ) )
{
2023-08-21 18:05:24 +00:00
new_values [ " CompiledExpressionCacheBytes " ] = { compiled_expression_cache - > sizeInBytes ( ) ,
2022-11-15 12:00:02 +00:00
" Total bytes used for the cache of JIT-compiled code. " } ;
new_values [ " CompiledExpressionCacheCount " ] = { compiled_expression_cache - > count ( ) ,
" Total entries in the cache of JIT-compiled code. " } ;
2022-11-09 12:37:42 +00:00
}
# endif
2022-11-15 12:00:02 +00:00
new_values [ " Uptime " ] = { getContext ( ) - > getUptimeSeconds ( ) ,
" The server uptime in seconds. It includes the time spent for server initialization before accepting connections. " } ;
2022-11-09 12:37:42 +00:00
if ( const auto stats = getHashTablesCacheStatistics ( ) )
{
2022-11-15 12:00:02 +00:00
new_values [ " HashTableStatsCacheEntries " ] = { stats - > entries ,
" The number of entries in the cache of hash table sizes. "
" The cache for hash table sizes is used for predictive optimization of GROUP BY. " } ;
new_values [ " HashTableStatsCacheHits " ] = { stats - > hits ,
" The number of times the prediction of a hash table size was correct. " } ;
new_values [ " HashTableStatsCacheMisses " ] = { stats - > misses ,
" The number of times the prediction of a hash table size was incorrect. " } ;
2022-11-09 12:37:42 +00:00
}
/// Free space in filesystems at data path and logs path.
{
auto stat = getStatVFS ( getContext ( ) - > getPath ( ) ) ;
2022-11-15 12:00:02 +00:00
new_values [ " FilesystemMainPathTotalBytes " ] = { stat . f_blocks * stat . f_frsize ,
" The size of the volume where the main ClickHouse path is mounted, in bytes. " } ;
new_values [ " FilesystemMainPathAvailableBytes " ] = { stat . f_bavail * stat . f_frsize ,
" Available bytes on the volume where the main ClickHouse path is mounted. " } ;
new_values [ " FilesystemMainPathUsedBytes " ] = { ( stat . f_blocks - stat . f_bavail ) * stat . f_frsize ,
" Used bytes on the volume where the main ClickHouse path is mounted. " } ;
new_values [ " FilesystemMainPathTotalINodes " ] = { stat . f_files ,
" The total number of inodes on the volume where the main ClickHouse path is mounted. If it is less than 25 million, it indicates a misconfiguration. " } ;
new_values [ " FilesystemMainPathAvailableINodes " ] = { stat . f_favail ,
" The number of available inodes on the volume where the main ClickHouse path is mounted. If it is close to zero, it indicates a misconfiguration, and you will get 'no space left on device' even when the disk is not full. " } ;
new_values [ " FilesystemMainPathUsedINodes " ] = { stat . f_files - stat . f_favail ,
" The number of used inodes on the volume where the main ClickHouse path is mounted. This value mostly corresponds to the number of files. " } ;
2022-11-09 12:37:42 +00:00
}
{
/// Current working directory of the server is the directory with logs.
auto stat = getStatVFS ( " . " ) ;
2022-11-15 12:00:02 +00:00
new_values [ " FilesystemLogsPathTotalBytes " ] = { stat . f_blocks * stat . f_frsize ,
" The size of the volume where ClickHouse logs path is mounted, in bytes. It's recommended to have at least 10 GB for logs. " } ;
new_values [ " FilesystemLogsPathAvailableBytes " ] = { stat . f_bavail * stat . f_frsize ,
" Available bytes on the volume where ClickHouse logs path is mounted. If this value approaches zero, you should tune the log rotation in the configuration file. " } ;
new_values [ " FilesystemLogsPathUsedBytes " ] = { ( stat . f_blocks - stat . f_bavail ) * stat . f_frsize ,
" Used bytes on the volume where ClickHouse logs path is mounted. " } ;
new_values [ " FilesystemLogsPathTotalINodes " ] = { stat . f_files ,
" The total number of inodes on the volume where ClickHouse logs path is mounted. " } ;
new_values [ " FilesystemLogsPathAvailableINodes " ] = { stat . f_favail ,
" The number of available inodes on the volume where ClickHouse logs path is mounted. " } ;
new_values [ " FilesystemLogsPathUsedINodes " ] = { stat . f_files - stat . f_favail ,
" The number of used inodes on the volume where ClickHouse logs path is mounted. " } ;
2022-11-09 12:37:42 +00:00
}
/// Free and total space on every configured disk.
{
DisksMap disks_map = getContext ( ) - > getDisksMap ( ) ;
for ( const auto & [ name , disk ] : disks_map )
{
auto total = disk - > getTotalSpace ( ) ;
/// Some disks don't support information about the space.
if ( ! total )
continue ;
auto available = disk - > getAvailableSpace ( ) ;
auto unreserved = disk - > getUnreservedSpace ( ) ;
2023-04-29 16:55:19 +00:00
new_values [ fmt : : format ( " DiskTotal_{} " , name ) ] = { * total ,
" The total size in bytes of the disk (virtual filesystem). Remote filesystems may not provide this information. " } ;
if ( available )
{
new_values [ fmt : : format ( " DiskUsed_{} " , name ) ] = { * total - * available ,
" Used bytes on the disk (virtual filesystem). Remote filesystems not always provide this information. " } ;
new_values [ fmt : : format ( " DiskAvailable_{} " , name ) ] = { * available ,
" Available bytes on the disk (virtual filesystem). Remote filesystems may not provide this information. " } ;
}
if ( unreserved )
new_values [ fmt : : format ( " DiskUnreserved_{} " , name ) ] = { * unreserved ,
" Available bytes on the disk (virtual filesystem) without the reservations for merges, fetches, and moves. Remote filesystems may not provide this information. " } ;
2022-11-09 12:37:42 +00:00
}
}
{
auto databases = DatabaseCatalog : : instance ( ) . getDatabases ( ) ;
size_t max_queue_size = 0 ;
size_t max_inserts_in_queue = 0 ;
size_t max_merges_in_queue = 0 ;
size_t sum_queue_size = 0 ;
size_t sum_inserts_in_queue = 0 ;
size_t sum_merges_in_queue = 0 ;
size_t max_absolute_delay = 0 ;
size_t max_relative_delay = 0 ;
size_t max_part_count_for_partition = 0 ;
2023-02-15 12:58:00 +00:00
size_t number_of_databases = 0 ;
for ( auto [ db_name , _ ] : databases )
if ( db_name ! = DatabaseCatalog : : TEMPORARY_DATABASE )
+ + number_of_databases ; /// filter out the internal database for temporary tables, system table "system.databases" behaves the same way
2022-11-09 12:37:42 +00:00
size_t total_number_of_tables = 0 ;
size_t total_number_of_bytes = 0 ;
size_t total_number_of_rows = 0 ;
size_t total_number_of_parts = 0 ;
2023-08-20 03:05:54 +00:00
size_t total_number_of_tables_system = 0 ;
size_t total_number_of_bytes_system = 0 ;
size_t total_number_of_rows_system = 0 ;
size_t total_number_of_parts_system = 0 ;
2022-11-09 12:37:42 +00:00
for ( const auto & db : databases )
{
/// Check if database can contain MergeTree tables
if ( ! db . second - > canContainMergeTreeTables ( ) )
continue ;
2023-08-20 03:05:54 +00:00
bool is_system = db . first = = DatabaseCatalog : : SYSTEM_DATABASE ;
2022-11-09 12:37:42 +00:00
for ( auto iterator = db . second - > getTablesIterator ( getContext ( ) ) ; iterator - > isValid ( ) ; iterator - > next ( ) )
{
+ + total_number_of_tables ;
2023-08-20 03:05:54 +00:00
if ( is_system )
+ + total_number_of_tables_system ;
2022-11-09 12:37:42 +00:00
const auto & table = iterator - > table ( ) ;
if ( ! table )
continue ;
if ( MergeTreeData * table_merge_tree = dynamic_cast < MergeTreeData * > ( table . get ( ) ) )
{
const auto & settings = getContext ( ) - > getSettingsRef ( ) ;
calculateMax ( max_part_count_for_partition , table_merge_tree - > getMaxPartsCountAndSizeForPartition ( ) . first ) ;
2023-08-20 03:05:54 +00:00
size_t bytes = table_merge_tree - > totalBytes ( settings ) . value ( ) ;
size_t rows = table_merge_tree - > totalRows ( settings ) . value ( ) ;
size_t parts = table_merge_tree - > getActivePartsCount ( ) ;
total_number_of_bytes + = bytes ;
total_number_of_rows + = rows ;
total_number_of_parts + = parts ;
if ( is_system )
{
total_number_of_bytes_system + = bytes ;
total_number_of_rows_system + = rows ;
total_number_of_parts_system + = parts ;
}
2022-11-09 12:37:42 +00:00
}
if ( StorageReplicatedMergeTree * table_replicated_merge_tree = typeid_cast < StorageReplicatedMergeTree * > ( table . get ( ) ) )
{
2022-12-22 13:31:42 +00:00
ReplicatedTableStatus status ;
2022-11-09 12:37:42 +00:00
table_replicated_merge_tree - > getStatus ( status , false ) ;
calculateMaxAndSum ( max_queue_size , sum_queue_size , status . queue . queue_size ) ;
calculateMaxAndSum ( max_inserts_in_queue , sum_inserts_in_queue , status . queue . inserts_in_queue ) ;
calculateMaxAndSum ( max_merges_in_queue , sum_merges_in_queue , status . queue . merges_in_queue ) ;
if ( ! status . is_readonly )
{
try
{
time_t absolute_delay = 0 ;
time_t relative_delay = 0 ;
table_replicated_merge_tree - > getReplicaDelays ( absolute_delay , relative_delay ) ;
calculateMax ( max_absolute_delay , absolute_delay ) ;
calculateMax ( max_relative_delay , relative_delay ) ;
}
catch ( . . . )
{
tryLogCurrentException ( __PRETTY_FUNCTION__ ,
" Cannot get replica delay for table: " + backQuoteIfNeed ( db . first ) + " . " + backQuoteIfNeed ( iterator - > name ( ) ) ) ;
}
}
}
}
}
2022-11-15 12:00:02 +00:00
new_values [ " ReplicasMaxQueueSize " ] = { max_queue_size , " Maximum queue size (in the number of operations like get, merge) across Replicated tables. " } ;
new_values [ " ReplicasMaxInsertsInQueue " ] = { max_inserts_in_queue , " Maximum number of INSERT operations in the queue (still to be replicated) across Replicated tables. " } ;
new_values [ " ReplicasMaxMergesInQueue " ] = { max_merges_in_queue , " Maximum number of merge operations in the queue (still to be applied) across Replicated tables. " } ;
2022-11-09 12:37:42 +00:00
2022-11-15 12:00:02 +00:00
new_values [ " ReplicasSumQueueSize " ] = { sum_queue_size , " Sum queue size (in the number of operations like get, merge) across Replicated tables. " } ;
new_values [ " ReplicasSumInsertsInQueue " ] = { sum_inserts_in_queue , " Sum of INSERT operations in the queue (still to be replicated) across Replicated tables. " } ;
new_values [ " ReplicasSumMergesInQueue " ] = { sum_merges_in_queue , " Sum of merge operations in the queue (still to be applied) across Replicated tables. " } ;
2022-11-09 12:37:42 +00:00
2022-11-15 12:00:02 +00:00
new_values [ " ReplicasMaxAbsoluteDelay " ] = { max_absolute_delay , " Maximum difference in seconds between the most fresh replicated part and the most fresh data part still to be replicated, across Replicated tables. A very high value indicates a replica with no data. " } ;
new_values [ " ReplicasMaxRelativeDelay " ] = { max_relative_delay , " Maximum difference between the replica delay and the delay of the most up-to-date replica of the same table, across Replicated tables. " } ;
2022-11-09 12:37:42 +00:00
2022-11-15 12:00:02 +00:00
new_values [ " MaxPartCountForPartition " ] = { max_part_count_for_partition , " Maximum number of parts per partition across all partitions of all tables of MergeTree family. Values larger than 300 indicates misconfiguration, overload, or massive data loading. " } ;
2022-11-09 12:37:42 +00:00
2022-11-15 12:00:02 +00:00
new_values [ " NumberOfDatabases " ] = { number_of_databases , " Total number of databases on the server. " } ;
new_values [ " NumberOfTables " ] = { total_number_of_tables , " Total number of tables summed across the databases on the server, excluding the databases that cannot contain MergeTree tables. "
" The excluded database engines are those who generate the set of tables on the fly, like `Lazy`, `MySQL`, `PostgreSQL`, `SQlite`. " } ;
2022-11-09 12:37:42 +00:00
2022-11-15 12:00:02 +00:00
new_values [ " TotalBytesOfMergeTreeTables " ] = { total_number_of_bytes , " Total amount of bytes (compressed, including data and indices) stored in all tables of MergeTree family. " } ;
new_values [ " TotalRowsOfMergeTreeTables " ] = { total_number_of_rows , " Total amount of rows (records) stored in all tables of MergeTree family. " } ;
new_values [ " TotalPartsOfMergeTreeTables " ] = { total_number_of_parts , " Total amount of data parts in all tables of MergeTree family. "
" Numbers larger than 10 000 will negatively affect the server startup time and it may indicate unreasonable choice of the partition key. " } ;
2023-08-20 03:05:54 +00:00
new_values [ " NumberOfTablesSystem " ] = { total_number_of_tables_system , " Total number of tables in the system database on the server stored in tables of MergeTree family. " } ;
new_values [ " TotalBytesOfMergeTreeTablesSystem " ] = { total_number_of_bytes_system , " Total amount of bytes (compressed, including data and indices) stored in tables of MergeTree family in the system database. " } ;
new_values [ " TotalRowsOfMergeTreeTablesSystem " ] = { total_number_of_rows_system , " Total amount of rows (records) stored in tables of MergeTree family in the system database. " } ;
new_values [ " TotalPartsOfMergeTreeTablesSystem " ] = { total_number_of_parts_system , " Total amount of data parts in tables of MergeTree family in the system database. " } ;
2022-11-09 12:37:42 +00:00
}
# if USE_NURAFT
{
auto keeper_dispatcher = getContext ( ) - > tryGetKeeperDispatcher ( ) ;
if ( keeper_dispatcher )
updateKeeperInformation ( * keeper_dispatcher , new_values ) ;
}
# endif
2022-11-10 11:56:27 +00:00
updateHeavyMetricsIfNeeded ( current_time , update_time , new_values ) ;
2022-11-09 12:37:42 +00:00
}
void ServerAsynchronousMetrics : : logImpl ( AsynchronousMetricValues & new_values )
{
/// Log the new metrics.
if ( auto asynchronous_metric_log = getContext ( ) - > getAsynchronousMetricLog ( ) )
asynchronous_metric_log - > addValues ( new_values ) ;
}
void ServerAsynchronousMetrics : : updateDetachedPartsStats ( )
{
DetachedPartsStats current_values { } ;
for ( const auto & db : DatabaseCatalog : : instance ( ) . getDatabases ( ) )
{
if ( ! db . second - > canContainMergeTreeTables ( ) )
continue ;
for ( auto iterator = db . second - > getTablesIterator ( getContext ( ) ) ; iterator - > isValid ( ) ; iterator - > next ( ) )
{
const auto & table = iterator - > table ( ) ;
if ( ! table )
continue ;
if ( MergeTreeData * table_merge_tree = dynamic_cast < MergeTreeData * > ( table . get ( ) ) )
{
for ( const auto & detached_part : table_merge_tree - > getDetachedParts ( ) )
{
if ( ! detached_part . valid_name )
continue ;
if ( detached_part . prefix . empty ( ) )
+ + current_values . detached_by_user ;
+ + current_values . count ;
}
}
}
}
detached_parts_stats = current_values ;
}
void ServerAsynchronousMetrics : : updateHeavyMetricsIfNeeded ( TimePoint current_time , TimePoint update_time , AsynchronousMetricValues & new_values )
{
const auto time_after_previous_update = current_time - heavy_metric_previous_update_time ;
const bool update_heavy_metric = time_after_previous_update > = heavy_metric_update_period | | first_run ;
2023-02-25 19:53:17 +00:00
Stopwatch watch ;
2022-11-09 12:37:42 +00:00
if ( update_heavy_metric )
{
heavy_metric_previous_update_time = update_time ;
2023-02-25 19:53:17 +00:00
if ( first_run )
heavy_update_interval = heavy_metric_update_period . count ( ) ;
else
heavy_update_interval = std : : chrono : : duration_cast < std : : chrono : : microseconds > ( time_after_previous_update ) . count ( ) / 1e6 ;
2022-11-09 12:37:42 +00:00
/// Test shows that listing 100000 entries consuming around 0.15 sec.
updateDetachedPartsStats ( ) ;
watch . stop ( ) ;
/// Normally heavy metrics don't delay the rest of the metrics calculation
/// otherwise log the warning message
auto log_level = std : : make_pair ( DB : : LogsLevel : : trace , Poco : : Message : : PRIO_TRACE ) ;
if ( watch . elapsedSeconds ( ) > ( update_period . count ( ) / 2. ) )
log_level = std : : make_pair ( DB : : LogsLevel : : debug , Poco : : Message : : PRIO_DEBUG ) ;
else if ( watch . elapsedSeconds ( ) > ( update_period . count ( ) / 4. * 3 ) )
log_level = std : : make_pair ( DB : : LogsLevel : : warning , Poco : : Message : : PRIO_WARNING ) ;
LOG_IMPL ( log , log_level . first , log_level . second ,
" Update heavy metrics. "
" Update period {} sec. "
" Update heavy metrics period {} sec. "
" Heavy metrics calculation elapsed: {} sec. " ,
update_period . count ( ) ,
heavy_metric_update_period . count ( ) ,
watch . elapsedSeconds ( ) ) ;
}
2023-02-25 19:53:17 +00:00
new_values [ " AsynchronousHeavyMetricsCalculationTimeSpent " ] = { watch . elapsedSeconds ( ) , " Time in seconds spent for calculation of asynchronous heavy (tables related) metrics (this is the overhead of asynchronous metrics). " } ;
2022-11-09 12:37:42 +00:00
2023-02-25 19:53:17 +00:00
new_values [ " AsynchronousHeavyMetricsUpdateInterval " ] = { heavy_update_interval , " Heavy (tables related) metrics update interval " } ;
2022-11-15 12:00:02 +00:00
new_values [ " NumberOfDetachedParts " ] = { detached_parts_stats . count , " The total number of parts detached from MergeTree tables. A part can be detached by a user with the `ALTER TABLE DETACH` query or by the server itself it the part is broken, unexpected or unneeded. The server does not care about detached parts and they can be removed. " } ;
new_values [ " NumberOfDetachedByUserParts " ] = { detached_parts_stats . detached_by_user , " The total number of parts detached from MergeTree tables by users with the `ALTER TABLE DETACH` query (as opposed to unexpected, broken or ignored parts). The server does not care about detached parts and they can be removed. " } ;
2022-11-09 12:37:42 +00:00
}
}