2022-07-21 01:34:04 +00:00
# include "atomic.h"
2022-07-17 14:26:00 +00:00
# include <sys/auxv.h>
# include <fcntl.h> // open
# include <sys/stat.h> // O_RDONLY
# include <unistd.h> // read, close
# include <stdlib.h> // ssize_t
# include <stdio.h> // perror, fprintf
# include <link.h> // ElfW
2020-11-14 08:36:44 +00:00
# include <errno.h>
2022-11-01 14:33:52 +00:00
# include "syscall.h"
2022-11-08 16:25:03 +00:00
# if defined(__has_feature)
# if __has_feature(memory_sanitizer)
# include <sanitizer/msan_interface.h>
# endif
# endif
2022-07-17 14:26:00 +00:00
# define ARRAY_SIZE(a) sizeof((a)) / sizeof((a[0]))
/// Suppress TSan since it is possible for this code to be called from multiple threads,
/// and initialization is safe to be done multiple times from multiple threads.
2024-03-03 23:11:55 +00:00
# define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread")))
2022-07-17 14:26:00 +00:00
// We don't have libc struct available here.
// Compute aux vector manually (from /proc/self/auxv).
//
// Right now there is only 51 AT_* constants,
// so 64 should be enough until this implementation will be replaced with musl.
static unsigned long __auxv_procfs [ 64 ] ;
2022-01-24 15:11:54 +00:00
static unsigned long __auxv_secure = 0 ;
2022-07-17 14:26:00 +00:00
// Common
static unsigned long * __auxv_environ = NULL ;
static void * volatile getauxval_func ;
static unsigned long __auxv_init_environ ( unsigned long type ) ;
//
// auxv from procfs interface
//
ssize_t __retry_read ( int fd , void * buf , size_t count )
{
for ( ; ; )
{
2022-11-01 14:33:52 +00:00
// We cannot use the read syscall as it will be intercept by sanitizers, which aren't
// initialized yet. Emit syscall directly.
ssize_t ret = __syscall_ret ( __syscall ( SYS_read , fd , buf , count ) ) ;
2022-07-17 14:26:00 +00:00
if ( ret = = - 1 )
{
if ( errno = = EINTR )
{
continue ;
}
perror ( " Cannot read /proc/self/auxv " ) ;
abort ( ) ;
}
return ret ;
}
}
unsigned long NO_SANITIZE_THREAD __getauxval_procfs ( unsigned long type )
{
if ( type = = AT_SECURE )
{
return __auxv_secure ;
}
if ( type > = ARRAY_SIZE ( __auxv_procfs ) )
{
errno = ENOENT ;
return 0 ;
}
return __auxv_procfs [ type ] ;
}
static unsigned long NO_SANITIZE_THREAD __auxv_init_procfs ( unsigned long type )
{
2024-07-24 20:26:46 +00:00
# if defined(__x86_64__) && defined(__has_feature)
# if __has_feature(memory_sanitizer) || __has_feature(thread_sanitizer)
/// Sanitizers are not compatible with high ASLR entropy, which is the default on modern Linux distributions, and
/// to workaround this limitation, TSAN and MSAN (couldn't see other sanitizers doing the same), re-exec the binary
/// without ASLR (see https://github.com/llvm/llvm-project/commit/0784b1eefa36d4acbb0dacd2d18796e26313b6c5)
/// The problem we face is that, in order to re-exec, the sanitizer wants to use the original pathname in the call
/// and to get its value it uses getauxval (https://github.com/llvm/llvm-project/blob/20eff684203287828d6722fc860b9d3621429542/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp#L985-L988).
/// Since we provide getauxval ourselves (to minimize the version dependency on runtime glibc), we are the ones
// being called and we fail horribly:
///
/// ==301455==ERROR: MemorySanitizer: SEGV on unknown address 0x2ffc6d721550 (pc 0x5622c1cc0073 bp 0x000000000003 sp 0x7ffc6d721530 T301455)
/// ==301455==The signal is caused by a WRITE memory access.
/// #0 0x5622c1cc0073 in __auxv_init_procfs ./ClickHouse/base/glibc-compatibility/musl/getauxval.c:129:5
/// #1 0x5622c1cbffe9 in getauxval ./ClickHouse/base/glibc-compatibility/musl/getauxval.c:240:12
/// #2 0x5622c0d7bfb4 in __sanitizer::ReExec() crtstuff.c
/// #3 0x5622c0df7bfc in __msan::InitShadowWithReExec(bool) crtstuff.c
/// #4 0x5622c0d95356 in __msan_init (./ClickHouse/build_msan/contrib/google-protobuf-cmake/protoc+0x256356) (BuildId: 6411d3c88b898ba3f7d49760555977d3e61f0741)
/// #5 0x5622c0dfe878 in msan.module_ctor main.cc
/// #6 0x5622c1cc156c in __libc_csu_init (./ClickHouse/build_msan/contrib/google-protobuf-cmake/protoc+0x118256c) (BuildId: 6411d3c88b898ba3f7d49760555977d3e61f0741)
/// #7 0x73dc05dd7ea3 in __libc_start_main /usr/src/debug/glibc/glibc/csu/../csu/libc-start.c:343:6
/// #8 0x5622c0d6b7cd in _start (./ClickHouse/build_msan/contrib/google-protobuf-cmake/protoc+0x22c7cd) (BuildId: 6411d3c88b898ba3f7d49760555977d3e61f0741)
/// The source of the issue above is that, at this point in time during __msan_init, we can't really do much as
2024-07-25 10:26:37 +00:00
/// most global variables aren't initialized or available yet, so we can't initiate the auxiliary vector.
2024-07-24 20:26:46 +00:00
/// Normal glibc / musl getauxval doesn't have this problem since they initiate their auxval vector at the very
/// start of __libc_start_main (just keeping track of argv+argc+1), but we don't have such option (otherwise
2024-07-25 19:56:42 +00:00
/// this complexity of reading "/proc/self/auxv" or using __environ would not be necessary).
2024-07-24 20:26:46 +00:00
2024-07-25 10:26:37 +00:00
/// To avoid this crashes on the re-exec call (see above how it would fail when creating `aux`, and if we used
2024-07-24 20:26:46 +00:00
/// __auxv_init_environ then it would SIGSEV on READing `__environ`) we capture this call for `AT_EXECFN` and
/// unconditionally return "/proc/self/exe" without any preparation. Theoretically this should be fine in
/// our case, as we don't load any libraries. That's the theory at least.
if ( type = = AT_EXECFN )
return ( unsigned long ) " /proc/self/exe " ;
# endif
# endif
2022-07-17 14:26:00 +00:00
// For debugging:
// - od -t dL /proc/self/auxv
// - LD_SHOW_AUX= ls
int fd = open ( " /proc/self/auxv " , O_RDONLY ) ;
// It is possible in case of:
// - no procfs mounted
// - on android you are not able to read it unless running from shell or debugging
// - some other issues
if ( fd = = - 1 )
{
// Fallback to environ.
a_cas_p ( & getauxval_func , ( void * ) __auxv_init_procfs , ( void * ) __auxv_init_environ ) ;
return __auxv_init_environ ( type ) ;
}
ElfW ( auxv_t ) aux ;
/// NOTE: sizeof(aux) is very small (less then PAGE_SIZE), so partial read should not be possible.
_Static_assert ( sizeof ( aux ) < 4096 , " Unexpected sizeof(aux) " ) ;
while ( __retry_read ( fd , & aux , sizeof ( aux ) ) = = sizeof ( aux ) )
{
2022-11-04 12:21:50 +00:00
# if defined(__has_feature)
# if __has_feature(memory_sanitizer)
__msan_unpoison ( & aux , sizeof ( aux ) ) ;
# endif
# endif
2022-07-17 14:26:00 +00:00
if ( aux . a_type = = AT_NULL )
{
break ;
}
if ( aux . a_type = = AT_IGNORE | | aux . a_type = = AT_IGNOREPPC )
{
continue ;
}
2020-11-14 08:36:44 +00:00
2022-07-17 14:26:00 +00:00
if ( aux . a_type > = ARRAY_SIZE ( __auxv_procfs ) )
{
fprintf ( stderr , " AT_* is out of range: %li (maximum allowed is %zu) \n " , aux . a_type , ARRAY_SIZE ( __auxv_procfs ) ) ;
abort ( ) ;
}
if ( __auxv_procfs [ aux . a_type ] )
{
/// It is possible due to race on initialization.
}
__auxv_procfs [ aux . a_type ] = aux . a_un . a_val ;
}
close ( fd ) ;
__auxv_secure = __getauxval_procfs ( AT_SECURE ) ;
// Now we've initialized __auxv_procfs, next time getauxval() will only call __get_auxval().
a_cas_p ( & getauxval_func , ( void * ) __auxv_init_procfs , ( void * ) __getauxval_procfs ) ;
return __getauxval_procfs ( type ) ;
}
//
// auxv from environ interface
//
// NOTE: environ available only after static initializers,
// so you cannot rely on this if you need getauxval() before.
//
// Good example of such user is sanitizers, for example
// LSan will not work with __auxv_init_environ(),
// since it needs getauxval() before.
//
static size_t NO_SANITIZE_THREAD __find_auxv ( unsigned long type )
2022-01-29 22:09:15 +00:00
{
size_t i ;
2022-07-17 14:26:00 +00:00
for ( i = 0 ; __auxv_environ [ i ] ; i + = 2 )
2022-01-29 22:09:15 +00:00
{
2022-07-17 14:26:00 +00:00
if ( __auxv_environ [ i ] = = type )
{
2022-01-29 22:09:15 +00:00
return i + 1 ;
2022-07-17 14:26:00 +00:00
}
2022-01-29 22:09:15 +00:00
}
return ( size_t ) - 1 ;
}
2022-07-17 14:26:00 +00:00
unsigned long NO_SANITIZE_THREAD __getauxval_environ ( unsigned long type )
2020-11-14 08:36:44 +00:00
{
if ( type = = AT_SECURE )
return __auxv_secure ;
2022-07-17 14:26:00 +00:00
if ( __auxv_environ )
2020-11-14 08:36:44 +00:00
{
2022-01-29 22:09:15 +00:00
size_t index = __find_auxv ( type ) ;
if ( index ! = ( ( size_t ) - 1 ) )
2022-07-17 14:26:00 +00:00
return __auxv_environ [ index ] ;
2020-11-14 08:36:44 +00:00
}
2022-01-29 22:09:15 +00:00
errno = ENOENT ;
return 0 ;
2020-11-14 08:36:44 +00:00
}
2022-07-17 14:26:00 +00:00
static unsigned long NO_SANITIZE_THREAD __auxv_init_environ ( unsigned long type )
2022-01-24 15:11:54 +00:00
{
2022-01-29 22:09:15 +00:00
if ( ! __environ )
2022-01-24 15:11:54 +00:00
{
2022-07-17 14:26:00 +00:00
// __environ is not initialized yet so we can't initialize __auxv_environ right now.
2022-01-29 22:09:15 +00:00
// That's normally occurred only when getauxval() is called from some sanitizer's internal code.
errno = ENOENT ;
return 0 ;
2022-01-24 15:11:54 +00:00
}
2021-08-25 11:19:20 +00:00
2022-07-17 14:26:00 +00:00
// Initialize __auxv_environ and __auxv_secure.
2022-01-29 22:09:15 +00:00
size_t i ;
for ( i = 0 ; __environ [ i ] ; i + + ) ;
2022-07-17 14:26:00 +00:00
__auxv_environ = ( unsigned long * ) ( __environ + i + 1 ) ;
2022-01-29 22:09:15 +00:00
size_t secure_idx = __find_auxv ( AT_SECURE ) ;
if ( secure_idx ! = ( ( size_t ) - 1 ) )
2022-07-17 14:26:00 +00:00
__auxv_secure = __auxv_environ [ secure_idx ] ;
2021-08-25 11:19:20 +00:00
2022-07-17 14:26:00 +00:00
// Now we need to switch to __getauxval_environ for all later calls, since
// everything is initialized.
a_cas_p ( & getauxval_func , ( void * ) __auxv_init_environ , ( void * ) __getauxval_environ ) ;
2021-08-25 11:19:20 +00:00
2022-07-17 14:26:00 +00:00
return __getauxval_environ ( type ) ;
2021-08-25 11:19:20 +00:00
}
2022-07-17 14:26:00 +00:00
// Callchain:
// - __auxv_init_procfs -> __getauxval_environ
// - __auxv_init_procfs -> __auxv_init_environ -> __getauxval_environ
static void * volatile getauxval_func = ( void * ) __auxv_init_procfs ;
2021-08-25 11:19:20 +00:00
2024-07-25 19:56:42 +00:00
unsigned long NO_SANITIZE_THREAD getauxval ( unsigned long type )
2021-08-25 11:19:20 +00:00
{
return ( ( unsigned long ( * ) ( unsigned long ) ) getauxval_func ) ( type ) ;
}