ClickHouse/programs/main.cpp

514 lines
15 KiB
C++
Raw Normal View History

#include <csignal>
#include <csetjmp>
2020-03-26 17:53:57 +00:00
#include <unistd.h>
#include <fcntl.h>
#include <new>
2018-04-05 19:28:07 +00:00
#include <iostream>
#include <vector>
2018-04-05 19:28:07 +00:00
#include <string>
#include <tuple>
#include <string_view>
2018-11-26 00:56:50 +00:00
#include <utility> /// pair
#include <fmt/format.h>
2021-12-11 18:25:23 +00:00
#include "config_tools.h"
#include <Common/StringUtils/StringUtils.h>
#include <Common/getHashOfLoadedBinary.h>
2021-06-02 04:48:31 +00:00
#include <Common/IO.h>
2021-10-02 07:13:14 +00:00
#include <base/phdr_cache.h>
#include <base/coverage.h>
/// Universal executable for various clickhouse applications
int mainEntryClickHouseServer(int argc, char ** argv);
int mainEntryClickHouseClient(int argc, char ** argv);
int mainEntryClickHouseLocal(int argc, char ** argv);
int mainEntryClickHouseBenchmark(int argc, char ** argv);
int mainEntryClickHouseExtractFromConfig(int argc, char ** argv);
int mainEntryClickHouseCompressor(int argc, char ** argv);
int mainEntryClickHouseFormat(int argc, char ** argv);
int mainEntryClickHouseObfuscator(int argc, char ** argv);
2020-09-07 03:22:47 +00:00
int mainEntryClickHouseGitImport(int argc, char ** argv);
int mainEntryClickHouseStaticFilesDiskUploader(int argc, char ** argv);
int mainEntryClickHouseSU(int argc, char ** argv);
int mainEntryClickHouseDisks(int argc, char ** argv);
int mainEntryClickHouseHashBinary(int, char **)
{
/// Intentionally without newline. So you can run:
/// objcopy --add-section .clickhouse.hash=<(./clickhouse hash-binary) clickhouse
std::cout << getHashOfLoadedBinaryHex();
return 0;
}
2021-05-12 10:39:07 +00:00
#if ENABLE_CLICKHOUSE_KEEPER
int mainEntryClickHouseKeeper(int argc, char ** argv);
#endif
2022-04-19 15:44:11 +00:00
#if ENABLE_CLICKHOUSE_KEEPER_CONVERTER
2021-06-17 16:32:50 +00:00
int mainEntryClickHouseKeeperConverter(int argc, char ** argv);
#endif
2023-03-10 02:23:57 +00:00
#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
int mainEntryClickHouseKeeperClient(int argc, char ** argv);
#endif
// install
2020-08-08 18:38:34 +00:00
int mainEntryClickHouseInstall(int argc, char ** argv);
int mainEntryClickHouseStart(int argc, char ** argv);
int mainEntryClickHouseStop(int argc, char ** argv);
int mainEntryClickHouseStatus(int argc, char ** argv);
int mainEntryClickHouseRestart(int argc, char ** argv);
namespace
{
using MainFunc = int (*)(int, char**);
2021-12-06 18:27:06 +00:00
#if !defined(FUZZING_MODE)
/// Add an item here to register new application
std::pair<std::string_view, MainFunc> clickhouse_applications[] =
{
{"local", mainEntryClickHouseLocal},
{"client", mainEntryClickHouseClient},
{"benchmark", mainEntryClickHouseBenchmark},
{"server", mainEntryClickHouseServer},
{"extract-from-config", mainEntryClickHouseExtractFromConfig},
{"compressor", mainEntryClickHouseCompressor},
{"format", mainEntryClickHouseFormat},
{"obfuscator", mainEntryClickHouseObfuscator},
2020-09-07 03:22:47 +00:00
{"git-import", mainEntryClickHouseGitImport},
{"static-files-disk-uploader", mainEntryClickHouseStaticFilesDiskUploader},
{"su", mainEntryClickHouseSU},
{"hash-binary", mainEntryClickHouseHashBinary},
{"disks", mainEntryClickHouseDisks},
// keeper
2021-05-12 10:39:07 +00:00
#if ENABLE_CLICKHOUSE_KEEPER
{"keeper", mainEntryClickHouseKeeper},
#endif
2021-06-17 16:32:50 +00:00
#if ENABLE_CLICKHOUSE_KEEPER_CONVERTER
{"keeper-converter", mainEntryClickHouseKeeperConverter},
#endif
2023-03-10 02:23:57 +00:00
#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
{"keeper-client", mainEntryClickHouseKeeperClient},
#endif
// install
2020-08-08 18:38:34 +00:00
{"install", mainEntryClickHouseInstall},
{"start", mainEntryClickHouseStart},
{"stop", mainEntryClickHouseStop},
{"status", mainEntryClickHouseStatus},
{"restart", mainEntryClickHouseRestart},
};
2017-12-02 02:47:12 +00:00
int printHelp(int, char **)
{
std::cerr << "Use one of the following commands:" << std::endl;
for (auto & application : clickhouse_applications)
std::cerr << "clickhouse " << application.first << " [args] " << std::endl;
return -1;
2018-08-26 01:14:00 +00:00
}
#endif
2024-03-17 15:22:33 +00:00
/// Add an item here to register a new short name
std::pair<std::string_view, std::string_view> clickhouse_short_names[] =
{
{"chl", "local"},
{"chc", "client"},
};
enum class InstructionFail
{
NONE = 0,
SSE3 = 1,
SSSE3 = 2,
SSE4_1 = 3,
SSE4_2 = 4,
2020-05-24 16:39:31 +00:00
POPCNT = 5,
AVX = 6,
AVX2 = 7,
AVX512 = 8
};
2021-06-02 04:48:31 +00:00
auto instructionFailToString(InstructionFail fail)
{
switch (fail)
{
2022-04-16 22:19:36 +00:00
#define ret(x) return std::make_tuple(STDERR_FILENO, x, sizeof(x) - 1)
case InstructionFail::NONE:
ret("NONE");
case InstructionFail::SSE3:
ret("SSE3");
case InstructionFail::SSSE3:
ret("SSSE3");
case InstructionFail::SSE4_1:
ret("SSE4.1");
case InstructionFail::SSE4_2:
ret("SSE4.2");
2020-05-24 16:39:31 +00:00
case InstructionFail::POPCNT:
ret("POPCNT");
case InstructionFail::AVX:
ret("AVX");
case InstructionFail::AVX2:
ret("AVX2");
case InstructionFail::AVX512:
ret("AVX512");
}
UNREACHABLE();
}
sigjmp_buf jmpbuf;
[[noreturn]] void sigIllCheckHandler(int, siginfo_t *, void *)
{
siglongjmp(jmpbuf, 1);
}
2020-03-27 08:14:20 +00:00
/// Check if necessary SSE extensions are available by trying to execute some sse instructions.
/// If instruction is unavailable, SIGILL will be sent by kernel.
void checkRequiredInstructionsImpl(volatile InstructionFail & fail)
{
#if defined(__SSE3__)
fail = InstructionFail::SSE3;
__asm__ volatile ("addsubpd %%xmm0, %%xmm0" : : : "xmm0");
#endif
#if defined(__SSSE3__)
fail = InstructionFail::SSSE3;
__asm__ volatile ("pabsw %%xmm0, %%xmm0" : : : "xmm0");
#endif
#if defined(__SSE4_1__)
fail = InstructionFail::SSE4_1;
__asm__ volatile ("pmaxud %%xmm0, %%xmm0" : : : "xmm0");
#endif
#if defined(__SSE4_2__)
fail = InstructionFail::SSE4_2;
__asm__ volatile ("pcmpgtq %%xmm0, %%xmm0" : : : "xmm0");
#endif
2020-05-24 16:39:31 +00:00
/// Defined by -msse4.2
#if defined(__POPCNT__)
fail = InstructionFail::POPCNT;
{
uint64_t a = 0;
uint64_t b = 0;
2020-05-27 17:23:09 +00:00
__asm__ volatile ("popcnt %1, %0" : "=r"(a) :"r"(b) :);
2020-05-24 16:39:31 +00:00
}
#endif
#if defined(__AVX__)
fail = InstructionFail::AVX;
__asm__ volatile ("vaddpd %%ymm0, %%ymm0, %%ymm0" : : : "ymm0");
#endif
#if defined(__AVX2__)
fail = InstructionFail::AVX2;
__asm__ volatile ("vpabsw %%ymm0, %%ymm0" : : : "ymm0");
#endif
#if defined(__AVX512__)
fail = InstructionFail::AVX512;
__asm__ volatile ("vpabsw %%zmm0, %%zmm0" : : : "zmm0");
#endif
fail = InstructionFail::NONE;
}
/// Macros to avoid using strlen(), since it may fail if SSE is not supported.
2020-10-05 06:28:52 +00:00
#define writeError(data) do \
{ \
static_assert(__builtin_constant_p(data)); \
2022-04-16 22:19:36 +00:00
if (!writeRetry(STDERR_FILENO, data, sizeof(data) - 1)) \
2021-06-02 04:48:31 +00:00
_Exit(1); \
2020-10-05 06:28:52 +00:00
} while (false)
2020-03-26 17:53:57 +00:00
/// Check SSE and others instructions availability. Calls exit on fail.
/// This function must be called as early as possible, even before main, because static initializers may use unavailable instructions.
void checkRequiredInstructions()
{
struct sigaction sa{};
struct sigaction sa_old{};
sa.sa_sigaction = sigIllCheckHandler;
sa.sa_flags = SA_SIGINFO;
auto signal = SIGILL;
if (sigemptyset(&sa.sa_mask) != 0
|| sigaddset(&sa.sa_mask, signal) != 0
|| sigaction(signal, &sa, &sa_old) != 0)
{
2020-03-26 17:53:57 +00:00
/// You may wonder about strlen.
/// Typical implementation of strlen is using SSE4.2 or AVX2.
/// But this is not the case because it's compiler builtin and is executed at compile time.
writeError("Can not set signal handler\n");
2020-03-26 17:53:57 +00:00
_Exit(1);
}
volatile InstructionFail fail = InstructionFail::NONE;
if (sigsetjmp(jmpbuf, 1))
{
writeError("Instruction check fail. The CPU does not support ");
2021-06-02 04:48:31 +00:00
if (!std::apply(writeRetry, instructionFailToString(fail)))
_Exit(1);
writeError(" instruction set.\n");
2020-03-26 17:53:57 +00:00
_Exit(1);
}
checkRequiredInstructionsImpl(fail);
if (sigaction(signal, &sa_old, nullptr))
{
writeError("Can not set signal handler\n");
2020-03-26 17:53:57 +00:00
_Exit(1);
}
}
struct Checker
{
Checker()
{
checkRequiredInstructions();
}
2021-11-25 23:03:04 +00:00
} checker
#ifndef OS_DARWIN
2021-11-25 23:03:04 +00:00
__attribute__((init_priority(101))) /// Run before other static initializers.
#endif
;
2020-03-26 17:53:57 +00:00
2022-04-16 22:09:20 +00:00
2023-08-24 23:21:37 +00:00
#if !defined(FUZZING_MODE) && !defined(USE_MUSL)
/// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete.
void checkHarmfulEnvironmentVariables(char ** argv)
{
std::initializer_list<const char *> harmful_env_variables = {
/// The list is a selection from "man ld-linux".
"LD_PRELOAD",
"LD_LIBRARY_PATH",
"LD_ORIGIN_PATH",
"LD_AUDIT",
"LD_DYNAMIC_WEAK",
/// The list is a selection from "man dyld" (osx).
"DYLD_LIBRARY_PATH",
"DYLD_FALLBACK_LIBRARY_PATH",
"DYLD_VERSIONED_LIBRARY_PATH",
"DYLD_INSERT_LIBRARIES",
};
bool require_reexec = false;
for (const auto * var : harmful_env_variables)
{
if (const char * value = getenv(var); value && value[0]) // NOLINT(concurrency-mt-unsafe)
{
/// NOTE: setenv() is used over unsetenv() since unsetenv() marked as harmful
if (setenv(var, "", true)) // NOLINT(concurrency-mt-unsafe) // this is safe if not called concurrently
{
fmt::print(stderr, "Cannot override {} environment variable", var);
_exit(1);
}
require_reexec = true;
}
}
if (require_reexec)
{
/// Use execvp() over execv() to search in PATH.
///
/// This should be safe, since:
/// - if argv[0] is relative path - it is OK
/// - if argv[0] has only basename, the it will search in PATH, like shell will do.
///
/// Also note, that this (search in PATH) because there is no easy and
/// portable way to get absolute path of argv[0].
/// - on linux there is /proc/self/exec and AT_EXECFN
/// - but on other OSes there is no such thing (especially on OSX).
///
/// And since static linking will be done someday anyway,
/// let's not pollute the code base with special cases.
int error = execvp(argv[0], argv);
_exit(error);
}
}
#endif
2024-01-16 08:44:52 +00:00
#if defined(SANITIZE_COVERAGE)
__attribute__((no_sanitize("coverage")))
void dumpCoverage()
{
/// A user can request to dump the coverage information into files at exit.
/// This is useful for non-server applications such as clickhouse-format or clickhouse-client,
/// that cannot introspect it with SQL functions at runtime.
/// The CLICKHOUSE_WRITE_COVERAGE environment variable defines a prefix for a filename 'prefix.pid'
/// containing the list of addresses of covered .
/// The format is even simpler than Clang's "sancov": an array of 64-bit addresses, native byte order, no header.
if (const char * coverage_filename_prefix = getenv("CLICKHOUSE_WRITE_COVERAGE")) // NOLINT(concurrency-mt-unsafe)
{
auto dump = [](const std::string & name, auto span)
{
/// Write only non-zeros.
std::vector<uintptr_t> data;
data.reserve(span.size());
for (auto addr : span)
if (addr)
data.push_back(addr);
int fd = ::open(name.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0400);
if (-1 == fd)
{
writeError("Cannot open a file to write the coverage data\n");
}
else
{
if (!writeRetry(fd, reinterpret_cast<const char *>(data.data()), data.size() * sizeof(data[0])))
writeError("Cannot write the coverage data to a file\n");
if (0 != ::close(fd))
writeError("Cannot close the file with coverage data\n");
}
};
dump(fmt::format("{}.{}", coverage_filename_prefix, getpid()), getCumulativeCoverage());
}
}
#endif
}
bool isClickhouseApp(std::string_view app_suffix, std::vector<char *> & argv)
{
for (const auto & [alias, name] : clickhouse_short_names)
if (app_suffix == name
&& !argv.empty() && (alias == argv[0] || endsWith(argv[0], "/" + std::string(alias))))
return true;
/// Use app if the first arg 'app' is passed (the arg should be quietly removed)
if (argv.size() >= 2)
{
auto first_arg = argv.begin() + 1;
/// 'clickhouse --client ...' and 'clickhouse client ...' are Ok
if (*first_arg == app_suffix
|| (std::string_view(*first_arg).starts_with("--") && std::string_view(*first_arg).substr(2) == app_suffix))
{
argv.erase(first_arg);
return true;
}
}
/// Use app if clickhouse binary is run through symbolic link with name clickhouse-app
std::string app_name = "clickhouse-" + std::string(app_suffix);
return !argv.empty() && (app_name == argv[0] || endsWith(argv[0], "/" + app_name));
}
2022-09-17 01:55:39 +00:00
/// Don't allow dlopen in the main ClickHouse binary, because it is harmful and insecure.
/// We don't use it. But it can be used by some libraries for implementation of "plugins".
/// We absolutely discourage the ancient technique of loading
/// 3rd-party uncontrolled dangerous libraries into the process address space,
/// because it is insane.
2022-10-01 14:29:41 +00:00
#if !defined(USE_MUSL)
2022-09-17 01:55:39 +00:00
extern "C"
{
void * dlopen(const char *, int)
{
return nullptr;
}
2022-09-17 16:40:22 +00:00
void * dlmopen(long, const char *, int) // NOLINT
2022-09-17 01:55:39 +00:00
{
return nullptr;
}
int dlclose(void *)
{
return 0;
}
const char * dlerror()
{
return "ClickHouse does not allow dynamic library loading";
}
}
2022-10-01 14:29:41 +00:00
#endif
2022-09-17 01:55:39 +00:00
/// This allows to implement assert to forbid initialization of a class in static constructors.
/// Usage:
///
/// extern bool inside_main;
/// class C { C() { assert(inside_main); } };
#ifndef FUZZING_MODE
bool inside_main = false;
#else
bool inside_main = true;
#endif
2021-12-06 18:27:06 +00:00
#if !defined(FUZZING_MODE)
int main(int argc_, char ** argv_)
{
inside_main = true;
SCOPE_EXIT({ inside_main = false; });
2022-05-11 01:16:10 +00:00
/// PHDR cache is required for query profiler to work reliably
/// It also speed up exception handling, but exceptions from dynamically loaded libraries (dlopen)
/// will work only after additional call of this function.
2022-09-17 01:02:34 +00:00
/// Note: we forbid dlopen in our code.
2022-05-11 01:16:10 +00:00
updatePHDRCache();
#if !defined(USE_MUSL)
checkHarmfulEnvironmentVariables(argv_);
#endif
/// This is used for testing. For example,
/// clickhouse-local should be able to run a simple query without throw/catch.
if (getenv("CLICKHOUSE_TERMINATE_ON_ANY_EXCEPTION")) // NOLINT(concurrency-mt-unsafe)
DB::terminate_on_any_exception = true;
/// Reset new handler to default (that throws std::bad_alloc)
/// It is needed because LLVM library clobbers it.
std::set_new_handler(nullptr);
std::vector<char *> argv(argv_, argv_ + argc_);
/// Print a basic help if nothing was matched
MainFunc main_func = printHelp;
for (auto & application : clickhouse_applications)
{
if (isClickhouseApp(application.first, argv))
{
main_func = application.second;
break;
}
}
/// Interpret binary without argument or with arguments starts with dash
/// ('-') as clickhouse-local for better usability:
///
/// clickhouse # dumps help
/// clickhouse -q 'select 1' # use local
/// clickhouse # spawn local
/// clickhouse local # spawn local
///
if (main_func == printHelp && !argv.empty() && (argv.size() == 1 || argv[1][0] == '-'))
main_func = mainEntryClickHouseLocal;
int exit_code = main_func(static_cast<int>(argv.size()), argv.data());
#if defined(SANITIZE_COVERAGE)
2024-01-16 08:44:52 +00:00
dumpCoverage();
#endif
return exit_code;
}
#endif