From 0025110a16a7a79899dbabccb9b78727d15c83b8 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 19 Feb 2022 13:00:35 +0300 Subject: [PATCH] Replace exit() with abort() in case of NuRaft errors CI founds [1]: WARNING: ThreadSanitizer: data race (pid=99) Write of size 8 at 0x7b14002192b0 by thread T27: 12 std::__1::map<> 13 DB::OpenedFileCache::~OpenedFileCache() obj-x86_64-linux-gnu/../src/IO/OpenedFileCache.h:27:7 (clickhouse+0xac66de6) 14 cxa_at_exit_wrapper(void*) crtstuff.c (clickhouse+0xaa3646f) 15 decltype(*(std::__1::forward(fp0)).*fp()) std::__1::__invoke() Previous read of size 8 at 0x7b14002192b0 by thread T37 (mutexes: write M732116415018761216): 4 DB::OpenedFileCache::get() obj-x86_64-linux-gnu/../src/IO/OpenedFileCache.h:47:37 (clickhouse+0xac66784) Thread T27 'nuraft_commit' (tid=193, running) created by main thread at: ... Thread T37 'MergeMutate' (tid=204, running) created by main thread at: ... But it also reports that the mutex was already destroyed: Mutex M732116415018761216 is already destroyed. The problem is that [nuraft can call `exit()`](https://github.com/ClickHouse-Extras/NuRaft/blob/1707a7572aa66ec5d0a2dbe2bf5effa3352e6b2d/src/handle_commit.cxx#L157-L158) which will call atexit handlers: 2022.02.17 22:54:03.495450 [ 193 ] {} RaftInstance: background committing thread encounter err Memory limit (total) exceeded: would use 56.56 GiB (attempt to allocate chunk of 8192 bytes), maximum: 55.82 GiB, exiting to protect the system [1]: https://s3.amazonaws.com/clickhouse-test-reports/33057/5a8cf3ac98808dadf125068a33ed9c622998a484/fuzzer_astfuzzertsan,actions//report.html Let's replace exit() with abort() to avoid this. Signed-off-by: Azat Khuzhin --- src/Coordination/KeeperStateManager.cpp | 9 +++++++++ src/Coordination/KeeperStateManager.h | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/Coordination/KeeperStateManager.cpp b/src/Coordination/KeeperStateManager.cpp index c2d4274f972..52d0b0cc881 100644 --- a/src/Coordination/KeeperStateManager.cpp +++ b/src/Coordination/KeeperStateManager.cpp @@ -115,6 +115,15 @@ void KeeperStateManager::loadLogStore(uint64_t last_commited_index, uint64_t log log_store->init(last_commited_index, logs_to_keep); } +void KeeperStateManager::system_exit(const int /* exit_code */) +{ + /// NuRaft itself calls exit() which will call atexit handlers + /// and this may lead to an issues in multi-threaded program. + /// + /// Override this with abort(). + abort(); +} + ClusterConfigPtr KeeperStateManager::getLatestConfigFromLogStore() const { auto entry_with_change = log_store->getLatestConfigChange(); diff --git a/src/Coordination/KeeperStateManager.h b/src/Coordination/KeeperStateManager.h index fad76c89503..66037d78a63 100644 --- a/src/Coordination/KeeperStateManager.h +++ b/src/Coordination/KeeperStateManager.h @@ -73,7 +73,7 @@ public: nuraft::ptr get_srv_config() const { return configuration_wrapper.config; } - void system_exit(const int /* exit_code */) override {} + void system_exit(const int exit_code) override; int getPort() const {