2021-11-03 05:11:36 +00:00
|
|
|
#include "remapExecutable.h"
|
|
|
|
|
2020-09-14 18:00:45 +00:00
|
|
|
#if defined(__linux__) && defined(__amd64__) && defined(__SSE2__) && !defined(SANITIZER) && defined(NDEBUG) && !defined(SPLIT_SHARED_LIBRARIES)
|
2020-09-10 09:05:57 +00:00
|
|
|
|
|
|
|
#include <sys/mman.h>
|
|
|
|
#include <unistd.h>
|
2020-09-17 12:53:52 +00:00
|
|
|
#include <string.h>
|
2020-09-10 09:05:57 +00:00
|
|
|
#include <sys/syscall.h>
|
|
|
|
|
|
|
|
#include <emmintrin.h>
|
|
|
|
|
2020-09-17 12:53:52 +00:00
|
|
|
#include <Common/getMappedArea.h>
|
2020-09-10 09:05:57 +00:00
|
|
|
#include <Common/Exception.h>
|
2020-09-17 12:53:52 +00:00
|
|
|
#include <fmt/format.h>
|
2020-09-10 09:05:57 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int CANNOT_ALLOCATE_MEMORY;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
namespace
|
|
|
|
{
|
|
|
|
|
2021-05-27 20:43:00 +00:00
|
|
|
/// NOLINTNEXTLINE(cert-dcl50-cpp)
|
2020-09-10 09:05:57 +00:00
|
|
|
__attribute__((__noinline__)) int64_t our_syscall(...)
|
|
|
|
{
|
2020-09-14 13:31:30 +00:00
|
|
|
__asm__ __volatile__ (R"(
|
2020-09-10 09:05:57 +00:00
|
|
|
movq %%rdi,%%rax;
|
|
|
|
movq %%rsi,%%rdi;
|
|
|
|
movq %%rdx,%%rsi;
|
|
|
|
movq %%rcx,%%rdx;
|
|
|
|
movq %%r8,%%r10;
|
|
|
|
movq %%r9,%%r8;
|
|
|
|
movq 8(%%rsp),%%r9;
|
|
|
|
syscall;
|
|
|
|
ret
|
|
|
|
)" : : : "memory");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
__attribute__((__noinline__)) void remapToHugeStep3(void * scratch, size_t size, size_t offset)
|
|
|
|
{
|
|
|
|
/// The function should not use the stack, otherwise various optimizations, including "omit-frame-pointer" may break the code.
|
|
|
|
|
|
|
|
/// Unmap the scratch area.
|
|
|
|
our_syscall(SYS_munmap, scratch, size);
|
|
|
|
|
|
|
|
/** The return address of this function is pointing to scratch area (because it was called from there).
|
|
|
|
* But the scratch area no longer exists. We should correct the return address by subtracting the offset.
|
|
|
|
*/
|
|
|
|
__asm__ __volatile__("subq %0, 8(%%rsp)" : : "r"(offset) : "memory");
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
__attribute__((__noinline__)) void remapToHugeStep2(void * begin, size_t size, void * scratch)
|
|
|
|
{
|
|
|
|
/** Unmap old memory region with the code of our program.
|
|
|
|
* Our instruction pointer is located inside scratch area and this function can execute after old code is unmapped.
|
|
|
|
* But it cannot call any other functions because they are not available at usual addresses
|
2020-09-10 17:16:12 +00:00
|
|
|
* - that's why we have to use "our_syscall" function and a substitution for memcpy.
|
2020-09-10 09:05:57 +00:00
|
|
|
* (Relative addressing may continue to work but we should not assume that).
|
|
|
|
*/
|
|
|
|
|
|
|
|
int64_t offset = reinterpret_cast<intptr_t>(scratch) - reinterpret_cast<intptr_t>(begin);
|
|
|
|
int64_t (*syscall_func)(...) = reinterpret_cast<int64_t (*)(...)>(reinterpret_cast<intptr_t>(our_syscall) + offset);
|
|
|
|
|
|
|
|
int64_t munmap_res = syscall_func(SYS_munmap, begin, size);
|
|
|
|
if (munmap_res != 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/// Map new anonymous memory region in place of old region with code.
|
|
|
|
|
|
|
|
int64_t mmap_res = syscall_func(SYS_mmap, begin, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
|
|
|
|
if (-1 == mmap_res)
|
|
|
|
syscall_func(SYS_exit, 1);
|
|
|
|
|
|
|
|
/// As the memory region is anonymous, we can do madvise with MADV_HUGEPAGE.
|
|
|
|
|
|
|
|
syscall_func(SYS_madvise, begin, size, MADV_HUGEPAGE);
|
|
|
|
|
|
|
|
/// Copy the code from scratch area to the old memory location.
|
|
|
|
|
2020-09-10 17:16:12 +00:00
|
|
|
{
|
|
|
|
__m128i * __restrict dst = reinterpret_cast<__m128i *>(begin);
|
|
|
|
const __m128i * __restrict src = reinterpret_cast<const __m128i *>(scratch);
|
|
|
|
const __m128i * __restrict src_end = reinterpret_cast<const __m128i *>(reinterpret_cast<const char *>(scratch) + size);
|
|
|
|
while (src < src_end)
|
|
|
|
{
|
|
|
|
_mm_storeu_si128(dst, _mm_loadu_si128(src));
|
|
|
|
|
|
|
|
++dst;
|
|
|
|
++src;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-09-10 09:05:57 +00:00
|
|
|
/// Make the memory area with the code executable and non-writable.
|
|
|
|
|
|
|
|
syscall_func(SYS_mprotect, begin, size, PROT_READ | PROT_EXEC);
|
|
|
|
|
|
|
|
/** Step 3 function should unmap the scratch area.
|
|
|
|
* The currently executed code is located in the scratch area and cannot be removed here.
|
|
|
|
* We have to call another function and use its address from the original location (not in scratch area).
|
2020-09-10 09:14:31 +00:00
|
|
|
* To do it, we obtain its pointer and call by pointer.
|
2020-09-10 09:05:57 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
void(* volatile step3)(void*, size_t, size_t) = remapToHugeStep3;
|
|
|
|
step3(scratch, size, offset);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
__attribute__((__noinline__)) void remapToHugeStep1(void * begin, size_t size)
|
|
|
|
{
|
|
|
|
/// Allocate scratch area and copy the code there.
|
|
|
|
|
|
|
|
void * scratch = mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
|
|
|
if (MAP_FAILED == scratch)
|
|
|
|
throwFromErrno(fmt::format("Cannot mmap {} bytes", size), ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
|
|
|
|
|
|
|
memcpy(scratch, begin, size);
|
|
|
|
|
|
|
|
/// Offset to the scratch area from previous location.
|
|
|
|
|
|
|
|
int64_t offset = reinterpret_cast<intptr_t>(scratch) - reinterpret_cast<intptr_t>(begin);
|
|
|
|
|
|
|
|
/// Jump to the next function inside the scratch area.
|
|
|
|
|
|
|
|
reinterpret_cast<void(*)(void*, size_t, void*)>(reinterpret_cast<intptr_t>(remapToHugeStep2) + offset)(begin, size, scratch);
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2021-11-03 05:11:36 +00:00
|
|
|
size_t remapExecutable()
|
2020-09-10 09:05:57 +00:00
|
|
|
{
|
|
|
|
auto [begin, size] = getMappedArea(reinterpret_cast<void *>(remapExecutable));
|
|
|
|
remapToHugeStep1(begin, size);
|
2021-11-03 05:11:36 +00:00
|
|
|
return size;
|
2020-09-10 09:05:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2021-11-03 05:11:36 +00:00
|
|
|
size_t remapExecutable() { return 0; }
|
2020-09-10 09:05:57 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|