diff --git a/.gitmodules b/.gitmodules index 12d865307d8..7e0b4df4ad1 100644 --- a/.gitmodules +++ b/.gitmodules @@ -372,3 +372,6 @@ [submodule "contrib/double-conversion"] path = contrib/double-conversion url = https://github.com/ClickHouse/double-conversion.git +[submodule "contrib/numactl"] + path = contrib/numactl + url = https://github.com/ClickHouse/numactl.git diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt index 159502c9735..341c92d3042 100644 --- a/base/base/CMakeLists.txt +++ b/base/base/CMakeLists.txt @@ -32,6 +32,7 @@ set (SRCS StringRef.cpp safeExit.cpp throwError.cpp + Numa.cpp ) add_library (common ${SRCS}) @@ -46,6 +47,10 @@ if (TARGET ch_contrib::crc32_s390x) target_link_libraries(common PUBLIC ch_contrib::crc32_s390x) endif() +if (TARGET ch_contrib::numactl) + target_link_libraries(common PUBLIC ch_contrib::numactl) +endif() + target_include_directories(common PUBLIC .. "${CMAKE_CURRENT_BINARY_DIR}/..") target_link_libraries (common diff --git a/base/base/Numa.cpp b/base/base/Numa.cpp new file mode 100644 index 00000000000..0bf56a993b8 --- /dev/null +++ b/base/base/Numa.cpp @@ -0,0 +1,37 @@ +#include + +#include "config.h" + +#if USE_NUMACTL +# include +#endif + +namespace DB +{ + +std::optional getNumaNodesTotalMemory() +{ + std::optional total_memory; +#if USE_NUMACTL + if (numa_available() != -1) + { + auto * membind = numa_get_membind(); + if (!numa_bitmask_equal(membind, numa_all_nodes_ptr)) + { + total_memory.emplace(0); + auto max_node = numa_max_node(); + for (int i = 0; i <= max_node; ++i) + { + if (numa_bitmask_isbitset(membind, i)) + *total_memory += numa_node_size(i, nullptr); + } + } + + numa_bitmask_free(membind); + } + +#endif + return total_memory; +} + +} diff --git a/base/base/Numa.h b/base/base/Numa.h new file mode 100644 index 00000000000..b48ab15766b --- /dev/null +++ b/base/base/Numa.h @@ -0,0 +1,12 @@ +#pragma once + +#include + +namespace DB +{ + +/// return total memory of NUMA nodes the process is bound to +/// if NUMA is not supported or process can use all nodes, std::nullopt is returned +std::optional getNumaNodesTotalMemory(); + +} diff --git a/base/base/getMemoryAmount.cpp b/base/base/getMemoryAmount.cpp index afdb6ba068a..03aab1eac72 100644 --- a/base/base/getMemoryAmount.cpp +++ b/base/base/getMemoryAmount.cpp @@ -2,15 +2,14 @@ #include #include +#include #include -#include #include #include #include - namespace { @@ -63,6 +62,9 @@ uint64_t getMemoryAmountOrZero() uint64_t memory_amount = num_pages * page_size; + if (auto total_numa_memory = DB::getNumaNodesTotalMemory(); total_numa_memory.has_value()) + memory_amount = *total_numa_memory; + /// Respect the memory limit set by cgroups v2. auto limit_v2 = getCgroupsV2MemoryLimit(); if (limit_v2.has_value() && *limit_v2 < memory_amount) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 90ae5981a21..977efda15ff 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -230,6 +230,8 @@ add_contrib (libssh-cmake libssh) add_contrib (prometheus-protobufs-cmake prometheus-protobufs prometheus-protobufs-gogo) +add_contrib(numactl-cmake numactl) + # Put all targets defined here and in subdirectories under "contrib/" folders in GUI-based IDEs. # Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear # in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually, diff --git a/contrib/numactl b/contrib/numactl new file mode 160000 index 00000000000..8d13d63a05f --- /dev/null +++ b/contrib/numactl @@ -0,0 +1 @@ +Subproject commit 8d13d63a05f0c3cd88bf777cbb61541202b7da08 diff --git a/contrib/numactl-cmake/CMakeLists.txt b/contrib/numactl-cmake/CMakeLists.txt new file mode 100644 index 00000000000..a72ff11e485 --- /dev/null +++ b/contrib/numactl-cmake/CMakeLists.txt @@ -0,0 +1,30 @@ +if (NOT ( + OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64 OR ARCH_LOONGARCH64)) +) + if (ENABLE_NUMACTL) + message (${RECONFIGURE_MESSAGE_LEVEL} + "numactl is disabled implicitly because the OS or architecture is not supported. Use -DENABLE_NUMACTL=0") + endif () + set (ENABLE_NUMACTL OFF) +else() + option (ENABLE_NUMACTL "Enable numactl" ${ENABLE_LIBRARIES}) +endif() + +if (NOT ENABLE_NUMACTL) + message (STATUS "Not using numactl") + return() +endif () + +set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/numactl") + +set (SRCS + "${LIBRARY_DIR}/libnuma.c" + "${LIBRARY_DIR}/syscall.c" +) + +add_library(_numactl ${SRCS}) + +target_include_directories(_numactl SYSTEM PRIVATE include) +target_include_directories(_numactl SYSTEM PUBLIC "${LIBRARY_DIR}") + +add_library(ch_contrib::numactl ALIAS _numactl) diff --git a/contrib/numactl-cmake/include/config.h b/contrib/numactl-cmake/include/config.h new file mode 100644 index 00000000000..a304db38e53 --- /dev/null +++ b/contrib/numactl-cmake/include/config.h @@ -0,0 +1,82 @@ +/* config.h. Generated from config.h.in by configure. */ +/* config.h.in. Generated from configure.ac by autoheader. */ + +/* Checking for symver attribute */ +#define HAVE_ATTRIBUTE_SYMVER 0 + +/* Define to 1 if you have the header file. */ +#define HAVE_DLFCN_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDIO_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#define LT_OBJDIR ".libs/" + +/* Name of package */ +#define PACKAGE "numactl" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "numactl" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "numactl 2.1" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "numactl" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "2.1" + +/* Define to 1 if all of the C89 standard headers exist (not just the ones + required in a freestanding environment). This macro is provided for + backward compatibility; new code need not use it. */ +#define STDC_HEADERS 1 + +/* If the compiler supports a TLS storage class define it to that here */ +#define TLS __thread + +/* Version number of package */ +#define VERSION "2.1" + +/* Number of bits in a file offset, on hosts where this is settable. */ +/* #undef _FILE_OFFSET_BITS */ + +/* Define to 1 on platforms where this makes off_t a 64-bit type. */ +/* #undef _LARGE_FILES */ + +/* Number of bits in time_t, on hosts where this is settable. */ +/* #undef _TIME_BITS */ + +/* Define to 1 on platforms where this makes time_t a 64-bit type. */ +/* #undef __MINGW_USE_VC2005_COMPAT */ diff --git a/docker/test/performance-comparison/run.sh b/docker/test/performance-comparison/run.sh index 7afb5da59b1..6ef781fa4c8 100644 --- a/docker/test/performance-comparison/run.sh +++ b/docker/test/performance-comparison/run.sh @@ -13,6 +13,7 @@ entry="/usr/share/clickhouse-test/performance/scripts/entrypoint.sh" # https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt # Double-escaped backslashes are a tribute to the engineering wonder of docker -- # it gives '/bin/sh: 1: [bash,: not found' otherwise. +numactl --hardware node=$(( RANDOM % $(numactl --hardware | sed -n 's/^.*available:\(.*\)nodes.*$/\1/p') )); echo Will bind to NUMA node $node; numactl --cpunodebind=$node --membind=$node $entry diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 8cf1a4d1999..783f60cb8ff 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -311,6 +312,12 @@ try MainThreadStatus::getInstance(); + if (auto total_numa_memory = getNumaNodesTotalMemory(); total_numa_memory.has_value()) + { + LOG_INFO( + log, "Keeper is bound to a subset of NUMA nodes. Total memory of all available nodes: {}", ReadableSize(*total_numa_memory)); + } + #if !defined(NDEBUG) || !defined(__OPTIMIZE__) LOG_WARNING(log, "Keeper was built in debug mode. It will work slowly."); #endif diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 16888015f8b..86ebd3a3225 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -140,6 +141,7 @@ # include #endif + #include /// A minimal file used when the server is run without installation INCBIN(resource_embedded_xml, SOURCE_DIR "/programs/server/embedded.xml"); @@ -754,6 +756,12 @@ try setenv("OPENSSL_CONF", config_dir.c_str(), true); /// NOLINT } + if (auto total_numa_memory = getNumaNodesTotalMemory(); total_numa_memory.has_value()) + { + LOG_INFO( + log, "ClickHouse is bound to a subset of NUMA nodes. Total memory of all available nodes: {}", ReadableSize(*total_numa_memory)); + } + registerInterpreters(); registerFunctions(); registerAggregateFunctions(); diff --git a/src/Common/config.h.in b/src/Common/config.h.in index f68701d5d10..6a0090130a3 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -64,6 +64,7 @@ #cmakedefine01 USE_LIBARCHIVE #cmakedefine01 USE_POCKETFFT #cmakedefine01 USE_PROMETHEUS_PROTOBUFS +#cmakedefine01 USE_NUMACTL /// This is needed for .incbin in assembly. For some reason, include paths don't work there in presence of LTO. /// That's why we use absolute paths. diff --git a/src/configure_config.cmake b/src/configure_config.cmake index 75f61baa854..d22bf674df4 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -173,5 +173,8 @@ endif() if (TARGET ch_contrib::prometheus_protobufs) set(USE_PROMETHEUS_PROTOBUFS 1) endif() +if (TARGET ch_contrib::numactl) + set(USE_NUMACTL 1) +endif() set(SOURCE_DIR ${PROJECT_SOURCE_DIR})