From 65573871485c2e8ca45d791551856fd2f8622cf9 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 25 Jul 2024 11:05:36 +0200 Subject: [PATCH 1/6] Try calcualting memory with numactl if membind used --- .gitmodules | 3 + base/base/CMakeLists.txt | 4 ++ base/base/getMemoryAmount.cpp | 26 +++++++- contrib/CMakeLists.txt | 2 + contrib/numactl | 1 + contrib/numactl-cmake/CMakeLists.txt | 20 +++++++ contrib/numactl-cmake/include/config.h | 82 ++++++++++++++++++++++++++ programs/server/Server.cpp | 27 +++++++++ src/Common/config.h.in | 1 + src/configure_config.cmake | 3 + 10 files changed, 168 insertions(+), 1 deletion(-) create mode 160000 contrib/numactl create mode 100644 contrib/numactl-cmake/CMakeLists.txt create mode 100644 contrib/numactl-cmake/include/config.h diff --git a/.gitmodules b/.gitmodules index 12d865307d8..b5d7e1e56b3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -372,3 +372,6 @@ [submodule "contrib/double-conversion"] path = contrib/double-conversion url = https://github.com/ClickHouse/double-conversion.git +[submodule "contrib/numactl"] + path = contrib/numactl + url = https://github.com/numactl/numactl.git diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt index 159502c9735..451a6eb5e8b 100644 --- a/base/base/CMakeLists.txt +++ b/base/base/CMakeLists.txt @@ -46,6 +46,10 @@ if (TARGET ch_contrib::crc32_s390x) target_link_libraries(common PUBLIC ch_contrib::crc32_s390x) endif() +if (TARGET ch_contrib::numactl) + target_link_libraries(common PUBLIC ch_contrib::numactl) +endif() + target_include_directories(common PUBLIC .. "${CMAKE_CURRENT_BINARY_DIR}/..") target_link_libraries (common diff --git a/base/base/getMemoryAmount.cpp b/base/base/getMemoryAmount.cpp index afdb6ba068a..b8162146496 100644 --- a/base/base/getMemoryAmount.cpp +++ b/base/base/getMemoryAmount.cpp @@ -4,12 +4,17 @@ #include #include -#include #include #include #include +#include "config.h" + +#if USE_NUMACTL +#include +#endif + namespace { @@ -63,6 +68,25 @@ uint64_t getMemoryAmountOrZero() uint64_t memory_amount = num_pages * page_size; +#if USE_NUMACTL + if (numa_available() != -1) + { + auto * membind = numa_get_membind(); + if (!numa_bitmask_equal(membind, numa_all_nodes_ptr)) + { + uint64_t total_numa_memory = 0; + auto max_node = numa_max_node(); + for (int i = 0; i <= max_node; ++i) + { + if (numa_bitmask_isbitset(membind, i)) + total_numa_memory += numa_node_size(i, nullptr); + } + + memory_amount = total_numa_memory; + } + } +#endif + /// Respect the memory limit set by cgroups v2. auto limit_v2 = getCgroupsV2MemoryLimit(); if (limit_v2.has_value() && *limit_v2 < memory_amount) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 90ae5981a21..977efda15ff 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -230,6 +230,8 @@ add_contrib (libssh-cmake libssh) add_contrib (prometheus-protobufs-cmake prometheus-protobufs prometheus-protobufs-gogo) +add_contrib(numactl-cmake numactl) + # Put all targets defined here and in subdirectories under "contrib/" folders in GUI-based IDEs. # Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear # in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually, diff --git a/contrib/numactl b/contrib/numactl new file mode 160000 index 00000000000..3871b1c42fc --- /dev/null +++ b/contrib/numactl @@ -0,0 +1 @@ +Subproject commit 3871b1c42fc71bceadafd745d2eff5dddfc2d67e diff --git a/contrib/numactl-cmake/CMakeLists.txt b/contrib/numactl-cmake/CMakeLists.txt new file mode 100644 index 00000000000..5d086366c7f --- /dev/null +++ b/contrib/numactl-cmake/CMakeLists.txt @@ -0,0 +1,20 @@ +option (ENABLE_NUMACTL "Enable numactl" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_NUMACTL) + message (STATUS "Not using numactl") + return() +endif () + +set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/numactl") + +set (SRCS + "${LIBRARY_DIR}/libnuma.c" + "${LIBRARY_DIR}/syscall.c" +) + +add_library(_numactl ${SRCS}) + +target_include_directories(_numactl SYSTEM PRIVATE include) +target_include_directories(_numactl SYSTEM PUBLIC "${LIBRARY_DIR}") + +add_library(ch_contrib::numactl ALIAS _numactl) diff --git a/contrib/numactl-cmake/include/config.h b/contrib/numactl-cmake/include/config.h new file mode 100644 index 00000000000..a304db38e53 --- /dev/null +++ b/contrib/numactl-cmake/include/config.h @@ -0,0 +1,82 @@ +/* config.h. Generated from config.h.in by configure. */ +/* config.h.in. Generated from configure.ac by autoheader. */ + +/* Checking for symver attribute */ +#define HAVE_ATTRIBUTE_SYMVER 0 + +/* Define to 1 if you have the header file. */ +#define HAVE_DLFCN_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDIO_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#define LT_OBJDIR ".libs/" + +/* Name of package */ +#define PACKAGE "numactl" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "numactl" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "numactl 2.1" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "numactl" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "2.1" + +/* Define to 1 if all of the C89 standard headers exist (not just the ones + required in a freestanding environment). This macro is provided for + backward compatibility; new code need not use it. */ +#define STDC_HEADERS 1 + +/* If the compiler supports a TLS storage class define it to that here */ +#define TLS __thread + +/* Version number of package */ +#define VERSION "2.1" + +/* Number of bits in a file offset, on hosts where this is settable. */ +/* #undef _FILE_OFFSET_BITS */ + +/* Define to 1 on platforms where this makes off_t a 64-bit type. */ +/* #undef _LARGE_FILES */ + +/* Number of bits in time_t, on hosts where this is settable. */ +/* #undef _TIME_BITS */ + +/* Define to 1 on platforms where this makes time_t a 64-bit type. */ +/* #undef __MINGW_USE_VC2005_COMPAT */ diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 16888015f8b..619a72ff200 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -140,6 +140,11 @@ # include #endif +#if USE_NUMACTL +#include +#endif + + #include /// A minimal file used when the server is run without installation INCBIN(resource_embedded_xml, SOURCE_DIR "/programs/server/embedded.xml"); @@ -754,6 +759,28 @@ try setenv("OPENSSL_CONF", config_dir.c_str(), true); /// NOLINT } +#if USE_NUMACTL + if (numa_available() != -1) + { + auto * membind = numa_get_membind(); + if (!numa_bitmask_equal(membind, numa_all_nodes_ptr)) + { + uint64_t total_numa_memory = 0; + auto max_node = numa_max_node(); + for (int i = 0; i <= max_node; ++i) + { + if (numa_bitmask_isbitset(membind, i)) + total_numa_memory += numa_node_size(i, nullptr); + } + + LOG_INFO( + log, + "ClickHouse is bound to a subset of NUMA nodes. Total memory of all available nodes {}", + ReadableSize(total_numa_memory)); + } + } +#endif + registerInterpreters(); registerFunctions(); registerAggregateFunctions(); diff --git a/src/Common/config.h.in b/src/Common/config.h.in index f68701d5d10..6a0090130a3 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -64,6 +64,7 @@ #cmakedefine01 USE_LIBARCHIVE #cmakedefine01 USE_POCKETFFT #cmakedefine01 USE_PROMETHEUS_PROTOBUFS +#cmakedefine01 USE_NUMACTL /// This is needed for .incbin in assembly. For some reason, include paths don't work there in presence of LTO. /// That's why we use absolute paths. diff --git a/src/configure_config.cmake b/src/configure_config.cmake index 75f61baa854..d22bf674df4 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -173,5 +173,8 @@ endif() if (TARGET ch_contrib::prometheus_protobufs) set(USE_PROMETHEUS_PROTOBUFS 1) endif() +if (TARGET ch_contrib::numactl) + set(USE_NUMACTL 1) +endif() set(SOURCE_DIR ${PROJECT_SOURCE_DIR}) From 2988e13050f0ab8a06e36ec8fe745386a214141b Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 25 Jul 2024 13:55:01 +0200 Subject: [PATCH 2/6] Free bitmask --- base/base/getMemoryAmount.cpp | 1 + programs/server/Server.cpp | 2 ++ 2 files changed, 3 insertions(+) diff --git a/base/base/getMemoryAmount.cpp b/base/base/getMemoryAmount.cpp index b8162146496..56cddbfd628 100644 --- a/base/base/getMemoryAmount.cpp +++ b/base/base/getMemoryAmount.cpp @@ -84,6 +84,7 @@ uint64_t getMemoryAmountOrZero() memory_amount = total_numa_memory; } + numa_bitmask_free(membind); } #endif diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 619a72ff200..b9a7c298f00 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -778,6 +778,8 @@ try "ClickHouse is bound to a subset of NUMA nodes. Total memory of all available nodes {}", ReadableSize(total_numa_memory)); } + + numa_bitmask_free(membind); } #endif From 287cce7d211b9386895a4fa898f87405eccb3e96 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 26 Jul 2024 09:20:15 +0200 Subject: [PATCH 3/6] Fixes --- .gitmodules | 2 +- contrib/numactl | 2 +- docker/test/performance-comparison/run.sh | 1 + programs/server/Server.cpp | 9 ++++++++- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/.gitmodules b/.gitmodules index b5d7e1e56b3..7e0b4df4ad1 100644 --- a/.gitmodules +++ b/.gitmodules @@ -374,4 +374,4 @@ url = https://github.com/ClickHouse/double-conversion.git [submodule "contrib/numactl"] path = contrib/numactl - url = https://github.com/numactl/numactl.git + url = https://github.com/ClickHouse/numactl.git diff --git a/contrib/numactl b/contrib/numactl index 3871b1c42fc..8d13d63a05f 160000 --- a/contrib/numactl +++ b/contrib/numactl @@ -1 +1 @@ -Subproject commit 3871b1c42fc71bceadafd745d2eff5dddfc2d67e +Subproject commit 8d13d63a05f0c3cd88bf777cbb61541202b7da08 diff --git a/docker/test/performance-comparison/run.sh b/docker/test/performance-comparison/run.sh index 7afb5da59b1..6ef781fa4c8 100644 --- a/docker/test/performance-comparison/run.sh +++ b/docker/test/performance-comparison/run.sh @@ -13,6 +13,7 @@ entry="/usr/share/clickhouse-test/performance/scripts/entrypoint.sh" # https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt # Double-escaped backslashes are a tribute to the engineering wonder of docker -- # it gives '/bin/sh: 1: [bash,: not found' otherwise. +numactl --hardware node=$(( RANDOM % $(numactl --hardware | sed -n 's/^.*available:\(.*\)nodes.*$/\1/p') )); echo Will bind to NUMA node $node; numactl --cpunodebind=$node --membind=$node $entry diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index b9a7c298f00..b818ff1f3a2 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -775,9 +775,16 @@ try LOG_INFO( log, - "ClickHouse is bound to a subset of NUMA nodes. Total memory of all available nodes {}", + "ClickHouse is bound to a subset of NUMA nodes. Total memory of all available nodes: {}", ReadableSize(total_numa_memory)); } + else + { + LOG_TRACE( + log, + "All NUMA nodes are used. Detected NUMA nodes: {}", + numa_num_configured_nodes()); + } numa_bitmask_free(membind); } From 2519f9ed4252020c6a9fb21ef1410c87f4053200 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 26 Jul 2024 12:08:16 +0200 Subject: [PATCH 4/6] Only support archs --- contrib/numactl-cmake/CMakeLists.txt | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/contrib/numactl-cmake/CMakeLists.txt b/contrib/numactl-cmake/CMakeLists.txt index 5d086366c7f..a72ff11e485 100644 --- a/contrib/numactl-cmake/CMakeLists.txt +++ b/contrib/numactl-cmake/CMakeLists.txt @@ -1,4 +1,14 @@ -option (ENABLE_NUMACTL "Enable numactl" ${ENABLE_LIBRARIES}) +if (NOT ( + OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64 OR ARCH_LOONGARCH64)) +) + if (ENABLE_NUMACTL) + message (${RECONFIGURE_MESSAGE_LEVEL} + "numactl is disabled implicitly because the OS or architecture is not supported. Use -DENABLE_NUMACTL=0") + endif () + set (ENABLE_NUMACTL OFF) +else() + option (ENABLE_NUMACTL "Enable numactl" ${ENABLE_LIBRARIES}) +endif() if (NOT ENABLE_NUMACTL) message (STATUS "Not using numactl") From a9b947d9ac422290cc42b4c29ba5cc1b934244b3 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Sun, 28 Jul 2024 11:51:09 +0200 Subject: [PATCH 5/6] Extract common --- base/base/CMakeLists.txt | 1 + base/base/Numa.cpp | 37 +++++++++++++++++++++++++++++++++++ base/base/Numa.h | 12 ++++++++++++ base/base/getMemoryAmount.cpp | 29 +++------------------------ programs/keeper/Keeper.cpp | 7 +++++++ programs/server/Server.cpp | 36 ++++------------------------------ 6 files changed, 64 insertions(+), 58 deletions(-) create mode 100644 base/base/Numa.cpp create mode 100644 base/base/Numa.h diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt index 451a6eb5e8b..341c92d3042 100644 --- a/base/base/CMakeLists.txt +++ b/base/base/CMakeLists.txt @@ -32,6 +32,7 @@ set (SRCS StringRef.cpp safeExit.cpp throwError.cpp + Numa.cpp ) add_library (common ${SRCS}) diff --git a/base/base/Numa.cpp b/base/base/Numa.cpp new file mode 100644 index 00000000000..0bf56a993b8 --- /dev/null +++ b/base/base/Numa.cpp @@ -0,0 +1,37 @@ +#include + +#include "config.h" + +#if USE_NUMACTL +# include +#endif + +namespace DB +{ + +std::optional getNumaNodesTotalMemory() +{ + std::optional total_memory; +#if USE_NUMACTL + if (numa_available() != -1) + { + auto * membind = numa_get_membind(); + if (!numa_bitmask_equal(membind, numa_all_nodes_ptr)) + { + total_memory.emplace(0); + auto max_node = numa_max_node(); + for (int i = 0; i <= max_node; ++i) + { + if (numa_bitmask_isbitset(membind, i)) + *total_memory += numa_node_size(i, nullptr); + } + } + + numa_bitmask_free(membind); + } + +#endif + return total_memory; +} + +} diff --git a/base/base/Numa.h b/base/base/Numa.h new file mode 100644 index 00000000000..b48ab15766b --- /dev/null +++ b/base/base/Numa.h @@ -0,0 +1,12 @@ +#pragma once + +#include + +namespace DB +{ + +/// return total memory of NUMA nodes the process is bound to +/// if NUMA is not supported or process can use all nodes, std::nullopt is returned +std::optional getNumaNodesTotalMemory(); + +} diff --git a/base/base/getMemoryAmount.cpp b/base/base/getMemoryAmount.cpp index 56cddbfd628..03aab1eac72 100644 --- a/base/base/getMemoryAmount.cpp +++ b/base/base/getMemoryAmount.cpp @@ -2,6 +2,7 @@ #include #include +#include #include @@ -9,13 +10,6 @@ #include #include -#include "config.h" - -#if USE_NUMACTL -#include -#endif - - namespace { @@ -68,25 +62,8 @@ uint64_t getMemoryAmountOrZero() uint64_t memory_amount = num_pages * page_size; -#if USE_NUMACTL - if (numa_available() != -1) - { - auto * membind = numa_get_membind(); - if (!numa_bitmask_equal(membind, numa_all_nodes_ptr)) - { - uint64_t total_numa_memory = 0; - auto max_node = numa_max_node(); - for (int i = 0; i <= max_node; ++i) - { - if (numa_bitmask_isbitset(membind, i)) - total_numa_memory += numa_node_size(i, nullptr); - } - - memory_amount = total_numa_memory; - } - numa_bitmask_free(membind); - } -#endif + if (auto total_numa_memory = DB::getNumaNodesTotalMemory(); total_numa_memory.has_value()) + memory_amount = *total_numa_memory; /// Respect the memory limit set by cgroups v2. auto limit_v2 = getCgroupsV2MemoryLimit(); diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 8cf1a4d1999..a1793b5d603 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -311,6 +312,12 @@ try MainThreadStatus::getInstance(); + if (auto total_numa_memory = getNumaNodesTotalMemory(); total_numa_memory.has_value()) + { + LOG_INFO( + log, "ClickHouse is bound to a subset of NUMA nodes. Total memory of all available nodes: {}", ReadableSize(*total_numa_memory)); + } + #if !defined(NDEBUG) || !defined(__OPTIMIZE__) LOG_WARNING(log, "Keeper was built in debug mode. It will work slowly."); #endif diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index b818ff1f3a2..86ebd3a3225 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -140,10 +141,6 @@ # include #endif -#if USE_NUMACTL -#include -#endif - #include /// A minimal file used when the server is run without installation @@ -759,36 +756,11 @@ try setenv("OPENSSL_CONF", config_dir.c_str(), true); /// NOLINT } -#if USE_NUMACTL - if (numa_available() != -1) + if (auto total_numa_memory = getNumaNodesTotalMemory(); total_numa_memory.has_value()) { - auto * membind = numa_get_membind(); - if (!numa_bitmask_equal(membind, numa_all_nodes_ptr)) - { - uint64_t total_numa_memory = 0; - auto max_node = numa_max_node(); - for (int i = 0; i <= max_node; ++i) - { - if (numa_bitmask_isbitset(membind, i)) - total_numa_memory += numa_node_size(i, nullptr); - } - - LOG_INFO( - log, - "ClickHouse is bound to a subset of NUMA nodes. Total memory of all available nodes: {}", - ReadableSize(total_numa_memory)); - } - else - { - LOG_TRACE( - log, - "All NUMA nodes are used. Detected NUMA nodes: {}", - numa_num_configured_nodes()); - } - - numa_bitmask_free(membind); + LOG_INFO( + log, "ClickHouse is bound to a subset of NUMA nodes. Total memory of all available nodes: {}", ReadableSize(*total_numa_memory)); } -#endif registerInterpreters(); registerFunctions(); From ba1c191331d3824786cfdac15437dc7bd142cf09 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 29 Jul 2024 11:47:57 +0200 Subject: [PATCH 6/6] Update programs/keeper/Keeper.cpp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: János Benjamin Antal --- programs/keeper/Keeper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index a1793b5d603..783f60cb8ff 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -315,7 +315,7 @@ try if (auto total_numa_memory = getNumaNodesTotalMemory(); total_numa_memory.has_value()) { LOG_INFO( - log, "ClickHouse is bound to a subset of NUMA nodes. Total memory of all available nodes: {}", ReadableSize(*total_numa_memory)); + log, "Keeper is bound to a subset of NUMA nodes. Total memory of all available nodes: {}", ReadableSize(*total_numa_memory)); } #if !defined(NDEBUG) || !defined(__OPTIMIZE__)