Add integrity checks for ClickHouse binary

This commit is contained in:
Alexey Milovidov 2021-01-07 05:56:57 +03:00
parent 6f75901ffb
commit 6f481d7512
8 changed files with 120 additions and 1 deletions

View File

@ -220,6 +220,12 @@ if (LINKER_NAME MATCHES "lld$")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--build-id=sha1")
endif ()
# Add a section with the hash of the compiled machine code for integrity checks.
# Only for official builds, because adding a section can be time consuming (rewrite of several GB).
if (OBJCOPY_PATH AND YANDEX_OFFICIAL_BUILD)
set (USE_BINARY_HASH 1)
endif ()
cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd

View File

@ -56,6 +56,9 @@
#include <Common/Config/ConfigProcessor.h>
#include <Common/MemorySanitizer.h>
#include <Common/SymbolIndex.h>
#include <Common/getExecutablePath.h>
#include <Common/getHashOfLoadedBinary.h>
#include <Common/Elf.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config_version.h>
@ -340,6 +343,32 @@ private:
/// Write symbolized stack trace line by line for better grep-ability.
stack_trace.toStringEveryLine([&](const std::string & s) { LOG_FATAL(log, s); });
#if defined(__linux__)
/// Write information about binary checksum. It can be difficult to calculate, so do it only after printing stack trace.
String calculated_binary_hash = getHashOfLoadedBinaryHex();
if (daemon.stored_binary_hash.empty())
{
LOG_FATAL(log, "Calculated checksum of the binary: {}."
" There is no information about the reference checksum.", calculated_binary_hash);
}
else if (calculated_binary_hash == daemon.stored_binary_hash)
{
LOG_FATAL(log, "Checksum of the binary: {}, integrity check passed.", calculated_binary_hash);
}
else
{
LOG_FATAL(log, "Calculated checksum of the ClickHouse binary ({0}) does not correspond"
" to the reference checksum stored in the binary ({1})."
" It may indicate one of the following:"
" - the file was changed just after startup;"
" - the file is damaged on disk due to faulty hardware;"
" - the loaded executable is damaged in memory due to faulty hardware;"
" - the file was intentionally modified;"
" - logical error in code."
, calculated_binary_hash, daemon.stored_binary_hash);
}
#endif
/// Write crash to system.crash_log table if available.
if (collectCrashLog)
collectCrashLog(sig, thread_num, query_id, stack_trace);
@ -799,6 +828,13 @@ void BaseDaemon::initializeTerminationAndSignalProcessing()
#else
build_id_info = "no build id";
#endif
#if defined(__linux__)
std::string executable_path = getExecutablePath();
if (!executable_path.empty())
stored_binary_hash = DB::Elf(executable_path).getBinaryHash();
#endif
}
void BaseDaemon::logRevision() const
@ -1010,3 +1046,9 @@ void BaseDaemon::setupWatchdog()
#endif
}
}
String BaseDaemon::getStoredBinaryHash() const
{
return stored_binary_hash;
}

View File

@ -121,6 +121,9 @@ public:
/// argv0 is needed to change process name (consequently, it is needed for scripts involving "pgrep", "pidof" to work correctly).
void shouldSetupWatchdog(char * argv0_);
/// Hash of the binary for integrity checks.
String getStoredBinaryHash() const;
protected:
virtual void logRevision() const;
@ -168,6 +171,7 @@ protected:
Poco::Util::AbstractConfiguration * last_configuration = nullptr;
String build_id_info;
String stored_binary_hash;
std::vector<int> handled_signals;

View File

@ -318,6 +318,10 @@ else ()
if (USE_GDB_ADD_INDEX)
add_custom_command(TARGET clickhouse POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} clickhouse COMMENT "Adding .gdb-index to clickhouse" VERBATIM)
endif()
if (USE_BINARY_HASH)
add_custom_command(TARGET clickhouse POST_BUILD COMMAND ./clickhouse hash-binary > hash && ${OBJCOPY_PATH} --add-section .note.ClickHouse.hash=hash clickhouse COMMENT "Adding .note.ClickHouse.hash to clickhouse" VERBATIM)
endif()
endif ()
if (ENABLE_TESTS AND USE_GTEST)

View File

@ -18,6 +18,7 @@
#endif
#include <Common/StringUtils/StringUtils.h>
#include <Common/getHashOfLoadedBinary.h>
#include <common/phdr_cache.h>
#include <ext/scope_guard.h>
@ -62,6 +63,14 @@ int mainEntryClickHouseStatus(int argc, char ** argv);
int mainEntryClickHouseRestart(int argc, char ** argv);
#endif
int mainEntryClickHouseHashBinary(int, char **)
{
/// Intentionally without newline. So you can run:
/// objcopy --add-section .note.ClickHouse.hash=<(./clickhouse hash-binary) clickhouse
std::cout << getHashOfLoadedBinaryHex();
return 0;
}
#define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0]))
namespace
@ -110,6 +119,7 @@ std::pair<const char *, MainFunc> clickhouse_applications[] =
{"status", mainEntryClickHouseStatus},
{"restart", mainEntryClickHouseRestart},
#endif
{"hash-binary", mainEntryClickHouseHashBinary},
};

View File

@ -65,6 +65,8 @@
#include <Server/TCPHandlerFactory.h>
#include <Common/SensitiveDataMasker.h>
#include <Common/ThreadFuzzer.h>
#include <Common/getHashOfLoadedBinary.h>
#include <Common/Elf.h>
#include <Server/MySQLHandlerFactory.h>
#include <Server/PostgreSQLHandlerFactory.h>
#include <Server/ProtocolServerAdapter.h>
@ -184,6 +186,7 @@ namespace ErrorCodes
extern const int FAILED_TO_GETPWUID;
extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA;
extern const int NETWORK_ERROR;
extern const int CORRUPTED_DATA;
}
@ -436,7 +439,45 @@ int Server::main(const std::vector<std::string> & /*args*/)
#if defined(OS_LINUX)
std::string executable_path = getExecutablePath();
if (executable_path.empty())
if (!executable_path.empty())
{
/// Integrity check based on checksum of the executable code.
/// Note: it is not intended to protect from malicious party,
/// because the reference checksum can be easily modified as well.
/// And we don't involve asymmetric encryption with PKI yet.
/// It's only intended to protect from faulty hardware.
/// Note: it is only based on machine code.
/// But there are other sections of the binary (e.g. exception handling tables)
/// that are interpreted (not executed) but can alter the behaviour of the program as well.
String calculated_binary_hash = getHashOfLoadedBinaryHex();
String stored_binary_hash = getStoredBinaryHash();
if (stored_binary_hash.empty())
{
LOG_WARNING(log, "Calculated checksum of the binary: {}."
" There is no information about the reference checksum.", calculated_binary_hash);
}
else if (calculated_binary_hash == stored_binary_hash)
{
LOG_INFO(log, "Calculated checksum of the binary: {}, integrity check passed.", calculated_binary_hash);
}
else
{
throw Exception(ErrorCodes::CORRUPTED_DATA,
"Calculated checksum of the ClickHouse binary ({0}) does not correspond"
" to the reference checksum stored in the binary ({1})."
" It may indicate one of the following:"
" - the file {2} was changed just after startup;"
" - the file {2} is damaged on disk due to faulty hardware;"
" - the loaded executable is damaged in memory due to faulty hardware;"
" - the file {2} was intentionally modified;"
" - logical error in code."
, calculated_binary_hash, stored_binary_hash, executable_path);
}
}
else
executable_path = "/usr/bin/clickhouse"; /// It is used for information messages.
/// After full config loaded

View File

@ -151,6 +151,15 @@ String Elf::getBuildID(const char * nhdr_pos, size_t size)
}
String Elf::getBinaryHash() const
{
if (auto section = findSectionByName(".note.ClickHouse.hash"))
return {section->begin(), section->end()};
else
return {};
}
const char * Elf::Section::name() const
{
if (!elf.section_names)

View File

@ -59,6 +59,9 @@ public:
String getBuildID() const;
static String getBuildID(const char * nhdr_pos, size_t size);
/// Hash of the binary for integrity checks.
String getBinaryHash() const;
private:
MMapReadBufferFromFile in;
size_t elf_size;