From 6f481d75129dfd2c573d865cf04cdad14a35b4dd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 7 Jan 2021 05:56:57 +0300 Subject: [PATCH] Add integrity checks for ClickHouse binary --- CMakeLists.txt | 6 ++++++ base/daemon/BaseDaemon.cpp | 42 +++++++++++++++++++++++++++++++++++++ base/daemon/BaseDaemon.h | 4 ++++ programs/CMakeLists.txt | 4 ++++ programs/main.cpp | 10 +++++++++ programs/server/Server.cpp | 43 +++++++++++++++++++++++++++++++++++++- src/Common/Elf.cpp | 9 ++++++++ src/Common/Elf.h | 3 +++ 8 files changed, 120 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e77e3b3feb6..95f95464857 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -220,6 +220,12 @@ if (LINKER_NAME MATCHES "lld$") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--build-id=sha1") endif () +# Add a section with the hash of the compiled machine code for integrity checks. +# Only for official builds, because adding a section can be time consuming (rewrite of several GB). +if (OBJCOPY_PATH AND YANDEX_OFFICIAL_BUILD) + set (USE_BINARY_HASH 1) +endif () + cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index 1224d9171ea..2c20eccf914 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -56,6 +56,9 @@ #include #include #include +#include +#include +#include #if !defined(ARCADIA_BUILD) # include @@ -340,6 +343,32 @@ private: /// Write symbolized stack trace line by line for better grep-ability. stack_trace.toStringEveryLine([&](const std::string & s) { LOG_FATAL(log, s); }); +#if defined(__linux__) + /// Write information about binary checksum. It can be difficult to calculate, so do it only after printing stack trace. + String calculated_binary_hash = getHashOfLoadedBinaryHex(); + if (daemon.stored_binary_hash.empty()) + { + LOG_FATAL(log, "Calculated checksum of the binary: {}." + " There is no information about the reference checksum.", calculated_binary_hash); + } + else if (calculated_binary_hash == daemon.stored_binary_hash) + { + LOG_FATAL(log, "Checksum of the binary: {}, integrity check passed.", calculated_binary_hash); + } + else + { + LOG_FATAL(log, "Calculated checksum of the ClickHouse binary ({0}) does not correspond" + " to the reference checksum stored in the binary ({1})." + " It may indicate one of the following:" + " - the file was changed just after startup;" + " - the file is damaged on disk due to faulty hardware;" + " - the loaded executable is damaged in memory due to faulty hardware;" + " - the file was intentionally modified;" + " - logical error in code." + , calculated_binary_hash, daemon.stored_binary_hash); + } +#endif + /// Write crash to system.crash_log table if available. if (collectCrashLog) collectCrashLog(sig, thread_num, query_id, stack_trace); @@ -799,6 +828,13 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() #else build_id_info = "no build id"; #endif + +#if defined(__linux__) + std::string executable_path = getExecutablePath(); + + if (!executable_path.empty()) + stored_binary_hash = DB::Elf(executable_path).getBinaryHash(); +#endif } void BaseDaemon::logRevision() const @@ -1010,3 +1046,9 @@ void BaseDaemon::setupWatchdog() #endif } } + + +String BaseDaemon::getStoredBinaryHash() const +{ + return stored_binary_hash; +} diff --git a/base/daemon/BaseDaemon.h b/base/daemon/BaseDaemon.h index 090d4997606..3c065970504 100644 --- a/base/daemon/BaseDaemon.h +++ b/base/daemon/BaseDaemon.h @@ -121,6 +121,9 @@ public: /// argv0 is needed to change process name (consequently, it is needed for scripts involving "pgrep", "pidof" to work correctly). void shouldSetupWatchdog(char * argv0_); + /// Hash of the binary for integrity checks. + String getStoredBinaryHash() const; + protected: virtual void logRevision() const; @@ -168,6 +171,7 @@ protected: Poco::Util::AbstractConfiguration * last_configuration = nullptr; String build_id_info; + String stored_binary_hash; std::vector handled_signals; diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index a1b5467f234..9adca58b55a 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -318,6 +318,10 @@ else () if (USE_GDB_ADD_INDEX) add_custom_command(TARGET clickhouse POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} clickhouse COMMENT "Adding .gdb-index to clickhouse" VERBATIM) endif() + + if (USE_BINARY_HASH) + add_custom_command(TARGET clickhouse POST_BUILD COMMAND ./clickhouse hash-binary > hash && ${OBJCOPY_PATH} --add-section .note.ClickHouse.hash=hash clickhouse COMMENT "Adding .note.ClickHouse.hash to clickhouse" VERBATIM) + endif() endif () if (ENABLE_TESTS AND USE_GTEST) diff --git a/programs/main.cpp b/programs/main.cpp index dee02c55832..cbb22b7a87b 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -18,6 +18,7 @@ #endif #include +#include #include #include @@ -62,6 +63,14 @@ int mainEntryClickHouseStatus(int argc, char ** argv); int mainEntryClickHouseRestart(int argc, char ** argv); #endif +int mainEntryClickHouseHashBinary(int, char **) +{ + /// Intentionally without newline. So you can run: + /// objcopy --add-section .note.ClickHouse.hash=<(./clickhouse hash-binary) clickhouse + std::cout << getHashOfLoadedBinaryHex(); + return 0; +} + #define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0])) namespace @@ -110,6 +119,7 @@ std::pair clickhouse_applications[] = {"status", mainEntryClickHouseStatus}, {"restart", mainEntryClickHouseRestart}, #endif + {"hash-binary", mainEntryClickHouseHashBinary}, }; diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 76765c0374c..63caec25099 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -65,6 +65,8 @@ #include #include #include +#include +#include #include #include #include @@ -184,6 +186,7 @@ namespace ErrorCodes extern const int FAILED_TO_GETPWUID; extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA; extern const int NETWORK_ERROR; + extern const int CORRUPTED_DATA; } @@ -436,7 +439,45 @@ int Server::main(const std::vector & /*args*/) #if defined(OS_LINUX) std::string executable_path = getExecutablePath(); - if (executable_path.empty()) + + if (!executable_path.empty()) + { + /// Integrity check based on checksum of the executable code. + /// Note: it is not intended to protect from malicious party, + /// because the reference checksum can be easily modified as well. + /// And we don't involve asymmetric encryption with PKI yet. + /// It's only intended to protect from faulty hardware. + /// Note: it is only based on machine code. + /// But there are other sections of the binary (e.g. exception handling tables) + /// that are interpreted (not executed) but can alter the behaviour of the program as well. + + String calculated_binary_hash = getHashOfLoadedBinaryHex(); + String stored_binary_hash = getStoredBinaryHash(); + + if (stored_binary_hash.empty()) + { + LOG_WARNING(log, "Calculated checksum of the binary: {}." + " There is no information about the reference checksum.", calculated_binary_hash); + } + else if (calculated_binary_hash == stored_binary_hash) + { + LOG_INFO(log, "Calculated checksum of the binary: {}, integrity check passed.", calculated_binary_hash); + } + else + { + throw Exception(ErrorCodes::CORRUPTED_DATA, + "Calculated checksum of the ClickHouse binary ({0}) does not correspond" + " to the reference checksum stored in the binary ({1})." + " It may indicate one of the following:" + " - the file {2} was changed just after startup;" + " - the file {2} is damaged on disk due to faulty hardware;" + " - the loaded executable is damaged in memory due to faulty hardware;" + " - the file {2} was intentionally modified;" + " - logical error in code." + , calculated_binary_hash, stored_binary_hash, executable_path); + } + } + else executable_path = "/usr/bin/clickhouse"; /// It is used for information messages. /// After full config loaded diff --git a/src/Common/Elf.cpp b/src/Common/Elf.cpp index 0c2359b3418..ee78c988f69 100644 --- a/src/Common/Elf.cpp +++ b/src/Common/Elf.cpp @@ -151,6 +151,15 @@ String Elf::getBuildID(const char * nhdr_pos, size_t size) } +String Elf::getBinaryHash() const +{ + if (auto section = findSectionByName(".note.ClickHouse.hash")) + return {section->begin(), section->end()}; + else + return {}; +} + + const char * Elf::Section::name() const { if (!elf.section_names) diff --git a/src/Common/Elf.h b/src/Common/Elf.h index 632d7e6f0b1..90783ddc18d 100644 --- a/src/Common/Elf.h +++ b/src/Common/Elf.h @@ -59,6 +59,9 @@ public: String getBuildID() const; static String getBuildID(const char * nhdr_pos, size_t size); + /// Hash of the binary for integrity checks. + String getBinaryHash() const; + private: MMapReadBufferFromFile in; size_t elf_size;