Merge pull request #37993 from ClickHouse/stripping

Strip less aggressively to make the embedded hash survive
This commit is contained in:
Robert Schulze 2022-06-16 00:08:36 +02:00 committed by GitHub
commit b39b963733
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 42 additions and 34 deletions

View File

@ -244,16 +244,18 @@ endif ()
# Add a section with the hash of the compiled machine code for integrity checks. # Add a section with the hash of the compiled machine code for integrity checks.
# Only for official builds, because adding a section can be time consuming (rewrite of several GB). # Only for official builds, because adding a section can be time consuming (rewrite of several GB).
# And cross compiled binaries are not supported (since you cannot execute clickhouse hash-binary) # And cross compiled binaries are not supported (since you cannot execute clickhouse hash-binary)
if (OBJCOPY_PATH AND CLICKHOUSE_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE OR CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64.cmake$")) if (CLICKHOUSE_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE OR CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64.cmake$"))
message(STATUS "Official build: A checksum hash will be added to the clickhouse executable")
set (USE_BINARY_HASH 1 CACHE STRING "Calculate binary hash and store it in the separate section") set (USE_BINARY_HASH 1 CACHE STRING "Calculate binary hash and store it in the separate section")
else ()
message(STATUS "No official build: A checksum hash will not be added to the clickhouse executable")
endif () endif ()
# Allows to build stripped binary in a separate directory # Optionally split binaries and debug symbols.
if (OBJCOPY_PATH AND STRIP_PATH) option(INSTALL_STRIPPED_BINARIES "Split binaries and debug symbols" OFF)
option(INSTALL_STRIPPED_BINARIES "Build stripped binaries with debug info in separate directory" OFF) if (INSTALL_STRIPPED_BINARIES)
if (INSTALL_STRIPPED_BINARIES) message(STATUS "Will split binaries and debug symbols")
set(STRIPPED_BINARIES_OUTPUT "stripped" CACHE STRING "A separate directory for stripped information") set(STRIPPED_BINARIES_OUTPUT "stripped" CACHE STRING "A separate directory for stripped information")
endif()
endif() endif()
cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd

View File

@ -19,9 +19,12 @@ macro(clickhouse_strip_binary)
COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/lib/debug/bin" COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/lib/debug/bin"
COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/bin" COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/bin"
COMMAND cp "${STRIP_BINARY_PATH}" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" COMMAND cp "${STRIP_BINARY_PATH}" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
# Splits debug symbols into separate file, leaves the binary untouched:
COMMAND "${OBJCOPY_PATH}" --only-keep-debug --compress-debug-sections "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" COMMAND "${OBJCOPY_PATH}" --only-keep-debug --compress-debug-sections "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" # Strips binary, sections '.note' & '.comment' are removed in line with Debian's stripping policy: www.debian.org/doc/debian-policy/ch-files.html, section '.clickhouse.hash' is needed for integrity check:
COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note --keep-section=.clickhouse.hash "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
# Associate stripped binary with debug symbols:
COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
COMMENT "Stripping clickhouse binary" VERBATIM COMMENT "Stripping clickhouse binary" VERBATIM
) )

View File

@ -508,7 +508,7 @@ else ()
endif() endif()
if (USE_BINARY_HASH) if (USE_BINARY_HASH)
add_custom_command(TARGET clickhouse POST_BUILD COMMAND ./clickhouse hash-binary > hash && ${OBJCOPY_PATH} --add-section .note.ClickHouse.hash=hash clickhouse COMMENT "Adding .note.ClickHouse.hash to clickhouse" VERBATIM) add_custom_command(TARGET clickhouse POST_BUILD COMMAND ./clickhouse hash-binary > hash && ${OBJCOPY_PATH} --add-section .clickhouse.hash=hash clickhouse COMMENT "Adding section '.clickhouse.hash' to clickhouse binary" VERBATIM)
endif() endif()
if (INSTALL_STRIPPED_BINARIES) if (INSTALL_STRIPPED_BINARIES)

View File

@ -82,7 +82,7 @@ int mainEntryClickHouseDisks(int argc, char ** argv);
int mainEntryClickHouseHashBinary(int, char **) int mainEntryClickHouseHashBinary(int, char **)
{ {
/// Intentionally without newline. So you can run: /// Intentionally without newline. So you can run:
/// objcopy --add-section .note.ClickHouse.hash=<(./clickhouse hash-binary) clickhouse /// objcopy --add-section .clickhouse.hash=<(./clickhouse hash-binary) clickhouse
std::cout << getHashOfLoadedBinaryHex(); std::cout << getHashOfLoadedBinaryHex();
return 0; return 0;
} }

View File

@ -744,16 +744,18 @@ int Server::main(const std::vector<std::string> & /*args*/)
/// But there are other sections of the binary (e.g. exception handling tables) /// But there are other sections of the binary (e.g. exception handling tables)
/// that are interpreted (not executed) but can alter the behaviour of the program as well. /// that are interpreted (not executed) but can alter the behaviour of the program as well.
/// Please keep the below log messages in-sync with the ones in daemon/BaseDaemon.cpp
String calculated_binary_hash = getHashOfLoadedBinaryHex(); String calculated_binary_hash = getHashOfLoadedBinaryHex();
if (stored_binary_hash.empty()) if (stored_binary_hash.empty())
{ {
LOG_WARNING(log, "Calculated checksum of the binary: {}." LOG_WARNING(log, "Integrity check of the executable skipped because the reference checksum could not be read."
" There is no information about the reference checksum.", calculated_binary_hash); " (calculated checksum: {})", calculated_binary_hash);
} }
else if (calculated_binary_hash == stored_binary_hash) else if (calculated_binary_hash == stored_binary_hash)
{ {
LOG_INFO(log, "Calculated checksum of the binary: {}, integrity check passed.", calculated_binary_hash); LOG_INFO(log, "Integrity check of the executable successfully passed (checksum: {})", calculated_binary_hash);
} }
else else
{ {
@ -769,14 +771,14 @@ int Server::main(const std::vector<std::string> & /*args*/)
else else
{ {
throw Exception(ErrorCodes::CORRUPTED_DATA, throw Exception(ErrorCodes::CORRUPTED_DATA,
"Calculated checksum of the ClickHouse binary ({0}) does not correspond" "Calculated checksum of the executable ({0}) does not correspond"
" to the reference checksum stored in the binary ({1})." " to the reference checksum stored in the executable ({1})."
" It may indicate one of the following:" " This may indicate one of the following:"
" - the file {2} was changed just after startup;" " - the executable {2} was changed just after startup;"
" - the file {2} is damaged on disk due to faulty hardware;" " - the executable {2} was corrupted on disk due to faulty hardware;"
" - the loaded executable is damaged in memory due to faulty hardware;" " - the loaded executable was corrupted in memory due to faulty hardware;"
" - the file {2} was intentionally modified;" " - the file {2} was intentionally modified;"
" - logical error in code." " - a logical error in the code."
, calculated_binary_hash, stored_binary_hash, executable_path); , calculated_binary_hash, stored_binary_hash, executable_path);
} }
} }

View File

@ -176,9 +176,9 @@ String Elf::getBuildID(const char * nhdr_pos, size_t size)
#endif // OS_SUNOS #endif // OS_SUNOS
String Elf::getBinaryHash() const String Elf::getStoredBinaryHash() const
{ {
if (auto section = findSectionByName(".note.ClickHouse.hash")) if (auto section = findSectionByName(".clickhouse.hash"))
return {section->begin(), section->end()}; return {section->begin(), section->end()};
else else
return {}; return {};

View File

@ -61,7 +61,7 @@ public:
static String getBuildID(const char * nhdr_pos, size_t size); static String getBuildID(const char * nhdr_pos, size_t size);
/// Hash of the binary for integrity checks. /// Hash of the binary for integrity checks.
String getBinaryHash() const; String getStoredBinaryHash() const;
private: private:
MMapReadBufferFromFile in; MMapReadBufferFromFile in;

View File

@ -352,26 +352,27 @@ private:
#if defined(OS_LINUX) #if defined(OS_LINUX)
/// Write information about binary checksum. It can be difficult to calculate, so do it only after printing stack trace. /// Write information about binary checksum. It can be difficult to calculate, so do it only after printing stack trace.
/// Please keep the below log messages in-sync with the ones in programs/server/Server.cpp
String calculated_binary_hash = getHashOfLoadedBinaryHex(); String calculated_binary_hash = getHashOfLoadedBinaryHex();
if (daemon.stored_binary_hash.empty()) if (daemon.stored_binary_hash.empty())
{ {
LOG_FATAL(log, "Calculated checksum of the binary: {}." LOG_FATAL(log, "Integrity check of the executable skipped because the reference checksum could not be read."
" There is no information about the reference checksum.", calculated_binary_hash); " (calculated checksum: {})", calculated_binary_hash);
} }
else if (calculated_binary_hash == daemon.stored_binary_hash) else if (calculated_binary_hash == daemon.stored_binary_hash)
{ {
LOG_FATAL(log, "Checksum of the binary: {}, integrity check passed.", calculated_binary_hash); LOG_FATAL(log, "Integrity check of the executable successfully passed (checksum: {})", calculated_binary_hash);
} }
else else
{ {
LOG_FATAL(log, "Calculated checksum of the ClickHouse binary ({0}) does not correspond" LOG_FATAL(log, "Calculated checksum of the executable ({0}) does not correspond"
" to the reference checksum stored in the binary ({1})." " to the reference checksum stored in the executable ({1})."
" It may indicate one of the following:" " This may indicate one of the following:"
" - the file was changed just after startup;" " - the executable was changed just after startup;"
" - the file is damaged on disk due to faulty hardware;" " - the executable was corrupted on disk due to faulty hardware;"
" - the loaded executable is damaged in memory due to faulty hardware;" " - the loaded executable was corrupted in memory due to faulty hardware;"
" - the file was intentionally modified;" " - the file was intentionally modified;"
" - logical error in code." " - a logical error in the code."
, calculated_binary_hash, daemon.stored_binary_hash); , calculated_binary_hash, daemon.stored_binary_hash);
} }
#endif #endif
@ -872,7 +873,7 @@ void BaseDaemon::initializeTerminationAndSignalProcessing()
std::string executable_path = getExecutablePath(); std::string executable_path = getExecutablePath();
if (!executable_path.empty()) if (!executable_path.empty())
stored_binary_hash = DB::Elf(executable_path).getBinaryHash(); stored_binary_hash = DB::Elf(executable_path).getStoredBinaryHash();
#endif #endif
} }