Merge pull request #37993 from ClickHouse/stripping

Strip less aggressively to make the embedded hash survive
This commit is contained in:
Robert Schulze 2022-06-16 00:08:36 +02:00 committed by GitHub
commit b39b963733
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 42 additions and 34 deletions

View File

@ -244,16 +244,18 @@ endif ()
# Add a section with the hash of the compiled machine code for integrity checks.
# Only for official builds, because adding a section can be time consuming (rewrite of several GB).
# And cross compiled binaries are not supported (since you cannot execute clickhouse hash-binary)
if (OBJCOPY_PATH AND CLICKHOUSE_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE OR CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64.cmake$"))
if (CLICKHOUSE_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE OR CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64.cmake$"))
message(STATUS "Official build: A checksum hash will be added to the clickhouse executable")
set (USE_BINARY_HASH 1 CACHE STRING "Calculate binary hash and store it in the separate section")
else ()
message(STATUS "No official build: A checksum hash will not be added to the clickhouse executable")
endif ()
# Allows to build stripped binary in a separate directory
if (OBJCOPY_PATH AND STRIP_PATH)
option(INSTALL_STRIPPED_BINARIES "Build stripped binaries with debug info in separate directory" OFF)
if (INSTALL_STRIPPED_BINARIES)
set(STRIPPED_BINARIES_OUTPUT "stripped" CACHE STRING "A separate directory for stripped information")
endif()
# Optionally split binaries and debug symbols.
option(INSTALL_STRIPPED_BINARIES "Split binaries and debug symbols" OFF)
if (INSTALL_STRIPPED_BINARIES)
message(STATUS "Will split binaries and debug symbols")
set(STRIPPED_BINARIES_OUTPUT "stripped" CACHE STRING "A separate directory for stripped information")
endif()
cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd

View File

@ -19,9 +19,12 @@ macro(clickhouse_strip_binary)
COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/lib/debug/bin"
COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/bin"
COMMAND cp "${STRIP_BINARY_PATH}" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
# Splits debug symbols into separate file, leaves the binary untouched:
COMMAND "${OBJCOPY_PATH}" --only-keep-debug --compress-debug-sections "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
# Strips binary, sections '.note' & '.comment' are removed in line with Debian's stripping policy: www.debian.org/doc/debian-policy/ch-files.html, section '.clickhouse.hash' is needed for integrity check:
COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note --keep-section=.clickhouse.hash "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
# Associate stripped binary with debug symbols:
COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
COMMENT "Stripping clickhouse binary" VERBATIM
)

View File

@ -508,7 +508,7 @@ else ()
endif()
if (USE_BINARY_HASH)
add_custom_command(TARGET clickhouse POST_BUILD COMMAND ./clickhouse hash-binary > hash && ${OBJCOPY_PATH} --add-section .note.ClickHouse.hash=hash clickhouse COMMENT "Adding .note.ClickHouse.hash to clickhouse" VERBATIM)
add_custom_command(TARGET clickhouse POST_BUILD COMMAND ./clickhouse hash-binary > hash && ${OBJCOPY_PATH} --add-section .clickhouse.hash=hash clickhouse COMMENT "Adding section '.clickhouse.hash' to clickhouse binary" VERBATIM)
endif()
if (INSTALL_STRIPPED_BINARIES)

View File

@ -82,7 +82,7 @@ int mainEntryClickHouseDisks(int argc, char ** argv);
int mainEntryClickHouseHashBinary(int, char **)
{
/// Intentionally without newline. So you can run:
/// objcopy --add-section .note.ClickHouse.hash=<(./clickhouse hash-binary) clickhouse
/// objcopy --add-section .clickhouse.hash=<(./clickhouse hash-binary) clickhouse
std::cout << getHashOfLoadedBinaryHex();
return 0;
}

View File

@ -744,16 +744,18 @@ int Server::main(const std::vector<std::string> & /*args*/)
/// But there are other sections of the binary (e.g. exception handling tables)
/// that are interpreted (not executed) but can alter the behaviour of the program as well.
/// Please keep the below log messages in-sync with the ones in daemon/BaseDaemon.cpp
String calculated_binary_hash = getHashOfLoadedBinaryHex();
if (stored_binary_hash.empty())
{
LOG_WARNING(log, "Calculated checksum of the binary: {}."
" There is no information about the reference checksum.", calculated_binary_hash);
LOG_WARNING(log, "Integrity check of the executable skipped because the reference checksum could not be read."
" (calculated checksum: {})", calculated_binary_hash);
}
else if (calculated_binary_hash == stored_binary_hash)
{
LOG_INFO(log, "Calculated checksum of the binary: {}, integrity check passed.", calculated_binary_hash);
LOG_INFO(log, "Integrity check of the executable successfully passed (checksum: {})", calculated_binary_hash);
}
else
{
@ -769,14 +771,14 @@ int Server::main(const std::vector<std::string> & /*args*/)
else
{
throw Exception(ErrorCodes::CORRUPTED_DATA,
"Calculated checksum of the ClickHouse binary ({0}) does not correspond"
" to the reference checksum stored in the binary ({1})."
" It may indicate one of the following:"
" - the file {2} was changed just after startup;"
" - the file {2} is damaged on disk due to faulty hardware;"
" - the loaded executable is damaged in memory due to faulty hardware;"
"Calculated checksum of the executable ({0}) does not correspond"
" to the reference checksum stored in the executable ({1})."
" This may indicate one of the following:"
" - the executable {2} was changed just after startup;"
" - the executable {2} was corrupted on disk due to faulty hardware;"
" - the loaded executable was corrupted in memory due to faulty hardware;"
" - the file {2} was intentionally modified;"
" - logical error in code."
" - a logical error in the code."
, calculated_binary_hash, stored_binary_hash, executable_path);
}
}

View File

@ -176,9 +176,9 @@ String Elf::getBuildID(const char * nhdr_pos, size_t size)
#endif // OS_SUNOS
String Elf::getBinaryHash() const
String Elf::getStoredBinaryHash() const
{
if (auto section = findSectionByName(".note.ClickHouse.hash"))
if (auto section = findSectionByName(".clickhouse.hash"))
return {section->begin(), section->end()};
else
return {};

View File

@ -61,7 +61,7 @@ public:
static String getBuildID(const char * nhdr_pos, size_t size);
/// Hash of the binary for integrity checks.
String getBinaryHash() const;
String getStoredBinaryHash() const;
private:
MMapReadBufferFromFile in;

View File

@ -352,26 +352,27 @@ private:
#if defined(OS_LINUX)
/// Write information about binary checksum. It can be difficult to calculate, so do it only after printing stack trace.
/// Please keep the below log messages in-sync with the ones in programs/server/Server.cpp
String calculated_binary_hash = getHashOfLoadedBinaryHex();
if (daemon.stored_binary_hash.empty())
{
LOG_FATAL(log, "Calculated checksum of the binary: {}."
" There is no information about the reference checksum.", calculated_binary_hash);
LOG_FATAL(log, "Integrity check of the executable skipped because the reference checksum could not be read."
" (calculated checksum: {})", calculated_binary_hash);
}
else if (calculated_binary_hash == daemon.stored_binary_hash)
{
LOG_FATAL(log, "Checksum of the binary: {}, integrity check passed.", calculated_binary_hash);
LOG_FATAL(log, "Integrity check of the executable successfully passed (checksum: {})", calculated_binary_hash);
}
else
{
LOG_FATAL(log, "Calculated checksum of the ClickHouse binary ({0}) does not correspond"
" to the reference checksum stored in the binary ({1})."
" It may indicate one of the following:"
" - the file was changed just after startup;"
" - the file is damaged on disk due to faulty hardware;"
" - the loaded executable is damaged in memory due to faulty hardware;"
LOG_FATAL(log, "Calculated checksum of the executable ({0}) does not correspond"
" to the reference checksum stored in the executable ({1})."
" This may indicate one of the following:"
" - the executable was changed just after startup;"
" - the executable was corrupted on disk due to faulty hardware;"
" - the loaded executable was corrupted in memory due to faulty hardware;"
" - the file was intentionally modified;"
" - logical error in code."
" - a logical error in the code."
, calculated_binary_hash, daemon.stored_binary_hash);
}
#endif
@ -872,7 +873,7 @@ void BaseDaemon::initializeTerminationAndSignalProcessing()
std::string executable_path = getExecutablePath();
if (!executable_path.empty())
stored_binary_hash = DB::Elf(executable_path).getBinaryHash();
stored_binary_hash = DB::Elf(executable_path).getStoredBinaryHash();
#endif
}