mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
Send MAINPID= notification from the parent (watchdog) process to make systemd handle READY=1 notifiaction from the child
This commit is contained in:
parent
58fcf0fdb0
commit
688e6fe714
@ -11,8 +11,8 @@ Wants=time-sync.target
|
||||
[Service]
|
||||
Type=notify
|
||||
|
||||
# Switching off watchdog is very important for sd_notify to work correctly.
|
||||
Environment=CLICKHOUSE_WATCHDOG_ENABLE=0
|
||||
# NOTE: we leave clickhouse watchdog process enabled to be able to see OOM/SIGKILL traces in clickhouse-server.log files.
|
||||
# If you wish to disable the watchdog and rely on systemd logs just add "Environment=CLICKHOUSE_WATCHDOG_ENABLE=0" line.
|
||||
User=clickhouse
|
||||
Group=clickhouse
|
||||
Restart=always
|
||||
|
@ -1,6 +1,5 @@
|
||||
#include "Server.h"
|
||||
|
||||
#include <cerrno>
|
||||
#include <memory>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/stat.h>
|
||||
@ -288,7 +287,6 @@ namespace ErrorCodes
|
||||
extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA;
|
||||
extern const int NETWORK_ERROR;
|
||||
extern const int CORRUPTED_DATA;
|
||||
extern const int SYSTEM_ERROR;
|
||||
}
|
||||
|
||||
|
||||
@ -662,63 +660,6 @@ static void sanityChecks(Server & server)
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
/// Sends notification to systemd, analogous to sd_notify from libsystemd.
|
||||
/// See https://man7.org/linux/man-pages/man3/sd_notify.3.html for more information on the supported notifications.
|
||||
static void systemdNotify(const std::string_view & command)
|
||||
{
|
||||
const char * path = getenv("NOTIFY_SOCKET"); // NOLINT(concurrency-mt-unsafe)
|
||||
|
||||
if (path == nullptr)
|
||||
return; /// not using systemd
|
||||
|
||||
int s = socket(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0);
|
||||
|
||||
if (s == -1)
|
||||
throwFromErrno("Can't create UNIX socket for systemd notify.", ErrorCodes::SYSTEM_ERROR);
|
||||
|
||||
SCOPE_EXIT({ close(s); });
|
||||
|
||||
const size_t len = strlen(path);
|
||||
|
||||
struct sockaddr_un addr;
|
||||
|
||||
addr.sun_family = AF_UNIX;
|
||||
|
||||
if (len < 2 || len > sizeof(addr.sun_path) - 1)
|
||||
throw Exception(ErrorCodes::SYSTEM_ERROR, "NOTIFY_SOCKET env var value \"{}\" is wrong.", path);
|
||||
|
||||
memcpy(addr.sun_path, path, len + 1); /// write last zero as well.
|
||||
|
||||
size_t addrlen = offsetof(struct sockaddr_un, sun_path) + len;
|
||||
|
||||
/// '@' means this is Linux abstract socket, per documentation sun_path[0] must be set to '\0' for it.
|
||||
if (path[0] == '@')
|
||||
addr.sun_path[0] = 0;
|
||||
else if (path[0] == '/')
|
||||
addrlen += 1; /// non-abstract-addresses should be zero terminated.
|
||||
else
|
||||
throw Exception(ErrorCodes::SYSTEM_ERROR, "Wrong UNIX path \"{}\" in NOTIFY_SOCKET env var", path);
|
||||
|
||||
const struct sockaddr *sock_addr = reinterpret_cast <const struct sockaddr *>(&addr);
|
||||
|
||||
size_t sent_bytes_total = 0;
|
||||
while (sent_bytes_total < command.size())
|
||||
{
|
||||
auto sent_bytes = sendto(s, command.data() + sent_bytes_total, command.size() - sent_bytes_total, 0, sock_addr, static_cast<socklen_t>(addrlen));
|
||||
if (sent_bytes == -1)
|
||||
{
|
||||
if (errno == EINTR)
|
||||
continue;
|
||||
else
|
||||
throwFromErrno("Failed to notify systemd, sendto returned error.", ErrorCodes::SYSTEM_ERROR);
|
||||
}
|
||||
else
|
||||
sent_bytes_total += sent_bytes;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
int Server::main(const std::vector<std::string> & /*args*/)
|
||||
try
|
||||
{
|
||||
@ -1857,6 +1798,10 @@ try
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
/// Tell the service manager that service startup is finished.
|
||||
/// NOTE: the parent clickhouse-watchdog process must do systemdNotify("MAINPID={}\n", child_pid); before
|
||||
/// the child process notifies 'READY=1'. So there is a possibility of a race condition but the chances should be low
|
||||
/// because the parent process does 'MAINPID=...' straight away after fork() and the child process does the actual
|
||||
/// initialization and only then sends 'READY=1'.
|
||||
systemdNotify("READY=1\n");
|
||||
#endif
|
||||
|
||||
|
@ -76,6 +76,7 @@ namespace DB
|
||||
{
|
||||
extern const int CANNOT_SET_SIGNAL_HANDLER;
|
||||
extern const int CANNOT_SEND_SIGNAL;
|
||||
extern const int SYSTEM_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1013,6 +1014,15 @@ void BaseDaemon::setupWatchdog()
|
||||
return;
|
||||
}
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
/// Tell the service manager the actual main process is not this one but the forked process
|
||||
/// because it is going to be serving the requests and it is going to send "READY=1" notification
|
||||
/// when it is fully started.
|
||||
/// NOTE: we do this right after fork() to minimize chances that the child process finishes initialization
|
||||
/// and sends "READY=1" before we send "MAINPID=..."
|
||||
systemdNotify(fmt::format("MAINPID={}\n", pid));
|
||||
#endif
|
||||
|
||||
/// Change short thread name and process name.
|
||||
setThreadName("clckhouse-watch"); /// 15 characters
|
||||
|
||||
@ -1131,3 +1141,58 @@ String BaseDaemon::getStoredBinaryHash() const
|
||||
{
|
||||
return stored_binary_hash;
|
||||
}
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
void systemdNotify(const std::string_view & command)
|
||||
{
|
||||
const char * path = getenv("NOTIFY_SOCKET"); // NOLINT(concurrency-mt-unsafe)
|
||||
|
||||
if (path == nullptr)
|
||||
return; /// not using systemd
|
||||
|
||||
int s = socket(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0);
|
||||
|
||||
if (s == -1)
|
||||
DB::throwFromErrno("Can't create UNIX socket for systemd notify.", DB::ErrorCodes::SYSTEM_ERROR);
|
||||
|
||||
SCOPE_EXIT({ close(s); });
|
||||
|
||||
const size_t len = strlen(path);
|
||||
|
||||
struct sockaddr_un addr;
|
||||
|
||||
addr.sun_family = AF_UNIX;
|
||||
|
||||
if (len < 2 || len > sizeof(addr.sun_path) - 1)
|
||||
throw DB::Exception(DB::ErrorCodes::SYSTEM_ERROR, "NOTIFY_SOCKET env var value \"{}\" is wrong.", path);
|
||||
|
||||
memcpy(addr.sun_path, path, len + 1); /// write last zero as well.
|
||||
|
||||
size_t addrlen = offsetof(struct sockaddr_un, sun_path) + len;
|
||||
|
||||
/// '@' means this is Linux abstract socket, per documentation sun_path[0] must be set to '\0' for it.
|
||||
if (path[0] == '@')
|
||||
addr.sun_path[0] = 0;
|
||||
else if (path[0] == '/')
|
||||
addrlen += 1; /// non-abstract-addresses should be zero terminated.
|
||||
else
|
||||
throw DB::Exception(DB::ErrorCodes::SYSTEM_ERROR, "Wrong UNIX path \"{}\" in NOTIFY_SOCKET env var", path);
|
||||
|
||||
const struct sockaddr *sock_addr = reinterpret_cast <const struct sockaddr *>(&addr);
|
||||
|
||||
size_t sent_bytes_total = 0;
|
||||
while (sent_bytes_total < command.size())
|
||||
{
|
||||
auto sent_bytes = sendto(s, command.data() + sent_bytes_total, command.size() - sent_bytes_total, 0, sock_addr, static_cast<socklen_t>(addrlen));
|
||||
if (sent_bytes == -1)
|
||||
{
|
||||
if (errno == EINTR)
|
||||
continue;
|
||||
else
|
||||
DB::throwFromErrno("Failed to notify systemd, sendto returned error.", DB::ErrorCodes::SYSTEM_ERROR);
|
||||
}
|
||||
else
|
||||
sent_bytes_total += sent_bytes;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -197,3 +197,9 @@ std::optional<std::reference_wrapper<Daemon>> BaseDaemon::tryGetInstance()
|
||||
else
|
||||
return {};
|
||||
}
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
/// Sends notification (e.g. "server is ready") to systemd, analogous to sd_notify from libsystemd.
|
||||
/// See https://www.freedesktop.org/software/systemd/man/sd_notify.html for more information on the supported notifications.
|
||||
void systemdNotify(const std::string_view & command);
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user