mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-27 01:51:59 +00:00
Fix ASan builds for glibc 2.36+ (use RTLD_NEXT for ThreadFuzzer interceptors)
Recently I noticed that clickhouse compiled with ASan does not work with newer glibc 2.36+, before I though that this was only about compiling with old but using new, however that was not correct, ASan simply does not work with glibc 2.36+. Here is a simple reproducer [1]: $ cat > test-asan.cpp <<EOL #include <pthread.h> int main() { // something broken in ASan in interceptor for __pthread_mutex_lock // and only since glibc 2.36, and for pthread_mutex_lock everything is OK pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; return __pthread_mutex_lock(&mutex); } EOL $ clang -g3 -o test-asan test-asan.cpp -fsanitize=address $ ./test-asan AddressSanitizer:DEADLYSIGNAL ================================================================= ==15659==ERROR: AddressSanitizer: SEGV on unknown address 0x000000000000 (pc 0x000000000000 bp 0x7fffffffccb0 sp 0x7fffffffcb98 T0) ==15659==Hint: pc points to the zero page. ==15659==The signal is caused by a READ memory access. ==15659==Hint: address points to the zero page. #0 0x0 (<unknown module>) #1 0x7ffff7cda28f (/usr/lib/libc.so.6+0x2328f) (BuildId: 1e94beb079e278ac4f2c8bce1f53091548ea1584) AddressSanitizer can not provide additional info. SUMMARY: AddressSanitizer: SEGV (<unknown module>) ==15659==ABORTING [1]: https://gist.github.com/azat/af073e57a248e04488b21068643f079e I've started observing glibc code, there was some changes in glibc, that moves pthread functions out from libpthread.so.0 into libc.so.6 (somewhere between 2.31 and 2.35), but the problem pops up only with 2.36, 2.35 works fine. After this I've looked into changes between 2.35 and 2.36, and found this patch [2] - "dlsym: Make RTLD_NEXT prefer default version definition [BZ #14932]", that fixes this bug [3]. [2]: https://sourceware.org/git/?p=glibc.git;a=commit;h=efa7936e4c91b1c260d03614bb26858fbb8a0204 [3]: https://sourceware.org/bugzilla/show_bug.cgi?id=14932 The problem with using DL_LOOKUP_RETURN_NEWEST flag for RTLD_NEXT is that it does not resolve hidden symbols (and __pthread_mutex_lock is indeed hidden). Here is a sample that will show the difference [4]: $ cat > test-dlsym.c <<EOL #define _GNU_SOURCE #include <dlfcn.h> #include <stdio.h> int main() { void *p = dlsym(RTLD_NEXT, "__pthread_mutex_lock"); printf("__pthread_mutex_lock: %p (via RTLD_NEXT)\n", p); return 0; } EOL # glibc 2.35: __pthread_mutex_lock: 0x7ffff7e27f70 (via RTLD_NEXT) # glibc 2.36: __pthread_mutex_lock: (nil) (via RTLD_NEXT) [4]: https://gist.github.com/azat/3b5f2ae6011bef2ae86392cea7789eb7 But ThreadFuzzer uses internal symbols to wrap pthread_mutex_lock/pthread_mutex_unlock, which are intercepted by ASan and this leads to NULL dereference. The fix was obvious - just use dlsym(RTLD_NEXT), however on older glibc's this leads to endless recursion (see commits in the code). But only for jemalloc [5], and even though sanitizers does not uses jemalloc the code of ThreadFuzzer is generic and I don't want to guard it with more preprocessors macros. [5]: https://gist.github.com/azat/588d9c72c1e70fc13ebe113197883aa2 So we have to use RTLD_NEXT only for ASan. There is also one more interesting issue, if you will compile with clang that itself had been compiled with newer libc (i.e. 2.36), you will get the following error: $ podman run --privileged -v $PWD/.cmake-asan/programs:/root/bin -e PATH=/bin:/root/bin -e --rm -it ubuntu-dev-v3 clickhouse ==1==ERROR: AddressSanitizer failed to allocate 0x0 (0) bytes of SetAlternateSignalStack (error code: 22) ... ==1==End of process memory map. AddressSanitizer: CHECK failed: sanitizer_common.cpp:53 "((0 && "unable to mmap")) != (0)" (0x0, 0x0) (tid=1) <empty stack> The problem is that since GLIBC_2.31, `SIGSTKSZ` is a call to `getconf(_SC_MINSIGSTKSZ)`, but older glibc does not have it, so `-1` will be returned and used as `SIGSTKSZ` instead. The workaround to disable alternative stack: $ podman run --privileged -v $PWD/.cmake-asan/programs:/root/bin -e PATH=/bin:/root/bin -e ASAN_OPTIONS=use_sigaltstack=0 --rm -it ubuntu-dev-v3 clickhouse client --version ClickHouse client version 22.13.1.1. Fixes: #43426 Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
This commit is contained in:
parent
0ad37ad286
commit
bdeb5514c5
@ -18,6 +18,7 @@
|
|||||||
#include <Common/thread_local_rng.h>
|
#include <Common/thread_local_rng.h>
|
||||||
|
|
||||||
#include <Common/ThreadFuzzer.h>
|
#include <Common/ThreadFuzzer.h>
|
||||||
|
#include "config.h" // USE_JEMALLOC
|
||||||
|
|
||||||
|
|
||||||
/// We will also wrap some thread synchronization functions to inject sleep/migration before or after.
|
/// We will also wrap some thread synchronization functions to inject sleep/migration before or after.
|
||||||
@ -27,28 +28,6 @@
|
|||||||
#define THREAD_FUZZER_WRAP_PTHREAD 0
|
#define THREAD_FUZZER_WRAP_PTHREAD 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/// Starting from glibc 2.34 there are no internal symbols without version,
|
|
||||||
/// so not __pthread_mutex_lock but __pthread_mutex_lock@2.2.5
|
|
||||||
#if defined(OS_LINUX) and !defined(USE_MUSL)
|
|
||||||
/// You can get version from glibc/sysdeps/unix/sysv/linux/$ARCH/$BITS_OR_BYTE_ORDER/libc.abilist
|
|
||||||
#if defined(__amd64__)
|
|
||||||
# define GLIBC_SYMVER "GLIBC_2.2.5"
|
|
||||||
#elif defined(__aarch64__)
|
|
||||||
# define GLIBC_SYMVER "GLIBC_2.17"
|
|
||||||
#elif defined(__riscv) && (__riscv_xlen == 64)
|
|
||||||
# define GLIBC_SYMVER "GLIBC_2.27"
|
|
||||||
#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
|
||||||
# define GLIBC_SYMVER "GLIBC_2.17"
|
|
||||||
#else
|
|
||||||
# error Your platform is not supported.
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define GLIBC_COMPAT_SYMBOL(func) __asm__(".symver " #func "," #func "@" GLIBC_SYMVER);
|
|
||||||
|
|
||||||
GLIBC_COMPAT_SYMBOL(__pthread_mutex_unlock)
|
|
||||||
GLIBC_COMPAT_SYMBOL(__pthread_mutex_lock)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if THREAD_FUZZER_WRAP_PTHREAD
|
#if THREAD_FUZZER_WRAP_PTHREAD
|
||||||
# define FOR_EACH_WRAPPED_FUNCTION(M) \
|
# define FOR_EACH_WRAPPED_FUNCTION(M) \
|
||||||
M(int, pthread_mutex_lock, pthread_mutex_t * arg) \
|
M(int, pthread_mutex_lock, pthread_mutex_t * arg) \
|
||||||
@ -291,34 +270,128 @@ void ThreadFuzzer::setup() const
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/// We expect that for every function like pthread_mutex_lock there is the same function with two underscores prefix.
|
|
||||||
/// NOTE We cannot use dlsym(... RTLD_NEXT), because it will call pthread_mutex_lock and it will lead to infinite recursion.
|
|
||||||
|
|
||||||
#if THREAD_FUZZER_WRAP_PTHREAD
|
#if THREAD_FUZZER_WRAP_PTHREAD
|
||||||
# define MAKE_WRAPPER(RET, NAME, ...) \
|
#define INJECTION_BEFORE(NAME) \
|
||||||
extern "C" RET __##NAME(__VA_ARGS__); \
|
injection( \
|
||||||
extern "C" RET NAME(__VA_ARGS__) \
|
NAME##_before_yield_probability.load(std::memory_order_relaxed), \
|
||||||
{ \
|
NAME##_before_migrate_probability.load(std::memory_order_relaxed), \
|
||||||
injection( \
|
NAME##_before_sleep_probability.load(std::memory_order_relaxed), \
|
||||||
NAME##_before_yield_probability.load(std::memory_order_relaxed), \
|
NAME##_before_sleep_time_us.load(std::memory_order_relaxed));
|
||||||
NAME##_before_migrate_probability.load(std::memory_order_relaxed), \
|
#define INJECTION_AFTER(NAME) \
|
||||||
NAME##_before_sleep_probability.load(std::memory_order_relaxed), \
|
injection( \
|
||||||
NAME##_before_sleep_time_us.load(std::memory_order_relaxed)); \
|
NAME##_after_yield_probability.load(std::memory_order_relaxed), \
|
||||||
\
|
NAME##_after_migrate_probability.load(std::memory_order_relaxed), \
|
||||||
auto && ret{__##NAME(arg)}; \
|
NAME##_after_sleep_probability.load(std::memory_order_relaxed), \
|
||||||
\
|
NAME##_after_sleep_time_us.load(std::memory_order_relaxed));
|
||||||
injection( \
|
|
||||||
NAME##_after_yield_probability.load(std::memory_order_relaxed), \
|
|
||||||
NAME##_after_migrate_probability.load(std::memory_order_relaxed), \
|
|
||||||
NAME##_after_sleep_probability.load(std::memory_order_relaxed), \
|
|
||||||
NAME##_after_sleep_time_us.load(std::memory_order_relaxed)); \
|
|
||||||
\
|
|
||||||
return ret; \
|
|
||||||
}
|
|
||||||
|
|
||||||
FOR_EACH_WRAPPED_FUNCTION(MAKE_WRAPPER)
|
/// ThreadFuzzer intercepts pthread_mutex_lock()/pthread_mutex_unlock().
|
||||||
|
///
|
||||||
|
/// glibc/musl exports internal symbol
|
||||||
|
/// (__pthread_mutex_lock/__pthread_mutex_unlock) that can be used instead of
|
||||||
|
/// obtaining real symbol with dlsym(RTLD_NEXT).
|
||||||
|
///
|
||||||
|
/// But, starting from glibc 2.34 there are no internal symbols without
|
||||||
|
/// version, so not __pthread_mutex_lock but __pthread_mutex_lock@2.2.5 (see
|
||||||
|
/// GLIBC_COMPAT_SYMBOL macro).
|
||||||
|
///
|
||||||
|
/// While ASan intercepts those symbols too (using RTLD_NEXT), and not only
|
||||||
|
/// public (pthread_mutex_{un,lock}, but also internal
|
||||||
|
/// (__pthread_mutex_{un,}lock).
|
||||||
|
///
|
||||||
|
/// However, since glibc 2.36, dlsym(RTLD_NEXT, "__pthread_mutex_lock") returns
|
||||||
|
/// NULL, because starting from 2.36 it does not return internal symbols with
|
||||||
|
/// RTLD_NEXT (see [1] and [2]).
|
||||||
|
///
|
||||||
|
/// [1]: https://sourceware.org/git/?p=glibc.git;a=commit;h=efa7936e4c91b1c260d03614bb26858fbb8a0204
|
||||||
|
/// [2]: https://gist.github.com/azat/3b5f2ae6011bef2ae86392cea7789eb7
|
||||||
|
///
|
||||||
|
/// And this, creates a problem for ThreadFuzzer, since it cannot use internal
|
||||||
|
/// symbol anymore (__pthread_mutex_lock), because it is intercepted by ASan,
|
||||||
|
/// which will call NULL.
|
||||||
|
///
|
||||||
|
/// This issue had been fixed for clang 16 [3], but it hadn't been released yet.
|
||||||
|
///
|
||||||
|
/// [3]: https://reviews.llvm.org/D140957
|
||||||
|
///
|
||||||
|
/// So to fix this, we will use dlsym(RTLD_NEXT) for the ASan build.
|
||||||
|
///
|
||||||
|
/// Note, that we cannot use it for release builds, since:
|
||||||
|
/// - glibc < 2.36 has allocation in dlsym()
|
||||||
|
/// - release build uses jemalloc
|
||||||
|
/// - jemalloc has mutexes for allocations
|
||||||
|
/// And all of this will lead to endless recursion here (note, that it wasn't
|
||||||
|
/// be a problem if only one of functions had been intercepted, since jemalloc
|
||||||
|
/// has a guard to not initialize multiple times, but because both intercepted,
|
||||||
|
/// the endless recursion takes place, you can find an example in [4]).
|
||||||
|
///
|
||||||
|
/// [4]: https://gist.github.com/azat/588d9c72c1e70fc13ebe113197883aa2
|
||||||
|
|
||||||
|
/// Starting from glibc 2.34 there are no internal symbols without version,
|
||||||
|
/// so not __pthread_mutex_lock but __pthread_mutex_lock@2.2.5
|
||||||
|
#if defined(OS_LINUX) and !defined(USE_MUSL)
|
||||||
|
/// You can get version from glibc/sysdeps/unix/sysv/linux/$ARCH/$BITS_OR_BYTE_ORDER/libc.abilist
|
||||||
|
#if defined(__amd64__)
|
||||||
|
# define GLIBC_SYMVER "GLIBC_2.2.5"
|
||||||
|
#elif defined(__aarch64__)
|
||||||
|
# define GLIBC_SYMVER "GLIBC_2.17"
|
||||||
|
#elif defined(__riscv) && (__riscv_xlen == 64)
|
||||||
|
# define GLIBC_SYMVER "GLIBC_2.27"
|
||||||
|
#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||||
|
# define GLIBC_SYMVER "GLIBC_2.17"
|
||||||
|
#else
|
||||||
|
# error Your platform is not supported.
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define GLIBC_COMPAT_SYMBOL(func) __asm__(".symver " #func "," #func "@" GLIBC_SYMVER);
|
||||||
|
|
||||||
|
GLIBC_COMPAT_SYMBOL(__pthread_mutex_unlock)
|
||||||
|
GLIBC_COMPAT_SYMBOL(__pthread_mutex_lock)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(ADDRESS_SANITIZER)
|
||||||
|
#if USE_JEMALLOC
|
||||||
|
#error "ASan cannot be used with jemalloc"
|
||||||
|
#endif
|
||||||
|
#if defined(USE_MUSL)
|
||||||
|
#error "ASan cannot be used with musl"
|
||||||
|
#endif
|
||||||
|
#include <dlfcn.h>
|
||||||
|
|
||||||
|
static void * getFunctionAddress(const char * name)
|
||||||
|
{
|
||||||
|
void * address = dlsym(RTLD_NEXT, name);
|
||||||
|
chassert(address && "Cannot obtain function address");
|
||||||
|
return address;
|
||||||
|
}
|
||||||
|
#define MAKE_WRAPPER_USING_DLSYM(RET, NAME, ...) \
|
||||||
|
static constinit RET(*real_##NAME)(__VA_ARGS__) = nullptr; \
|
||||||
|
extern "C" RET NAME(__VA_ARGS__) \
|
||||||
|
{ \
|
||||||
|
INJECTION_BEFORE(NAME); \
|
||||||
|
if (unlikely(!real_##NAME)) { \
|
||||||
|
real_##NAME = \
|
||||||
|
reinterpret_cast<RET(*)(__VA_ARGS__)>(getFunctionAddress(#NAME)); \
|
||||||
|
} \
|
||||||
|
auto && ret{real_##NAME(arg)}; \
|
||||||
|
INJECTION_AFTER(NAME); \
|
||||||
|
return ret; \
|
||||||
|
}
|
||||||
|
FOR_EACH_WRAPPED_FUNCTION(MAKE_WRAPPER_USING_DLSYM)
|
||||||
|
#undef MAKE_WRAPPER_USING_DLSYM
|
||||||
|
#else
|
||||||
|
#define MAKE_WRAPPER_USING_INTERNAL_SYMBOLS(RET, NAME, ...) \
|
||||||
|
extern "C" RET __##NAME(__VA_ARGS__); \
|
||||||
|
extern "C" RET NAME(__VA_ARGS__) \
|
||||||
|
{ \
|
||||||
|
INJECTION_BEFORE(NAME); \
|
||||||
|
auto && ret{__##NAME(arg)}; \
|
||||||
|
INJECTION_AFTER(NAME); \
|
||||||
|
return ret; \
|
||||||
|
}
|
||||||
|
FOR_EACH_WRAPPED_FUNCTION(MAKE_WRAPPER_USING_INTERNAL_SYMBOLS)
|
||||||
|
#undef MAKE_WRAPPER_USING_INTERNAL_SYMBOLS
|
||||||
|
#endif
|
||||||
|
|
||||||
# undef MAKE_WRAPPER
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user