Send MAINPID= notification from the parent (watchdog) process to make systemd handle READY=1 notifiaction from the child

This commit is contained in:
Alexander Gololobov 2022-12-05 22:20:04 +01:00
parent 58fcf0fdb0
commit 688e6fe714
4 changed files with 77 additions and 61 deletions

View File

@ -11,8 +11,8 @@ Wants=time-sync.target
[Service]
Type=notify
# Switching off watchdog is very important for sd_notify to work correctly.
Environment=CLICKHOUSE_WATCHDOG_ENABLE=0
# NOTE: we leave clickhouse watchdog process enabled to be able to see OOM/SIGKILL traces in clickhouse-server.log files.
# If you wish to disable the watchdog and rely on systemd logs just add "Environment=CLICKHOUSE_WATCHDOG_ENABLE=0" line.
User=clickhouse
Group=clickhouse
Restart=always

View File

@ -1,6 +1,5 @@
#include "Server.h"
#include <cerrno>
#include <memory>
#include <sys/resource.h>
#include <sys/stat.h>
@ -288,7 +287,6 @@ namespace ErrorCodes
extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA;
extern const int NETWORK_ERROR;
extern const int CORRUPTED_DATA;
extern const int SYSTEM_ERROR;
}
@ -662,63 +660,6 @@ static void sanityChecks(Server & server)
}
}
#if defined(OS_LINUX)
/// Sends notification to systemd, analogous to sd_notify from libsystemd.
/// See https://man7.org/linux/man-pages/man3/sd_notify.3.html for more information on the supported notifications.
static void systemdNotify(const std::string_view & command)
{
const char * path = getenv("NOTIFY_SOCKET"); // NOLINT(concurrency-mt-unsafe)
if (path == nullptr)
return; /// not using systemd
int s = socket(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0);
if (s == -1)
throwFromErrno("Can't create UNIX socket for systemd notify.", ErrorCodes::SYSTEM_ERROR);
SCOPE_EXIT({ close(s); });
const size_t len = strlen(path);
struct sockaddr_un addr;
addr.sun_family = AF_UNIX;
if (len < 2 || len > sizeof(addr.sun_path) - 1)
throw Exception(ErrorCodes::SYSTEM_ERROR, "NOTIFY_SOCKET env var value \"{}\" is wrong.", path);
memcpy(addr.sun_path, path, len + 1); /// write last zero as well.
size_t addrlen = offsetof(struct sockaddr_un, sun_path) + len;
/// '@' means this is Linux abstract socket, per documentation sun_path[0] must be set to '\0' for it.
if (path[0] == '@')
addr.sun_path[0] = 0;
else if (path[0] == '/')
addrlen += 1; /// non-abstract-addresses should be zero terminated.
else
throw Exception(ErrorCodes::SYSTEM_ERROR, "Wrong UNIX path \"{}\" in NOTIFY_SOCKET env var", path);
const struct sockaddr *sock_addr = reinterpret_cast <const struct sockaddr *>(&addr);
size_t sent_bytes_total = 0;
while (sent_bytes_total < command.size())
{
auto sent_bytes = sendto(s, command.data() + sent_bytes_total, command.size() - sent_bytes_total, 0, sock_addr, static_cast<socklen_t>(addrlen));
if (sent_bytes == -1)
{
if (errno == EINTR)
continue;
else
throwFromErrno("Failed to notify systemd, sendto returned error.", ErrorCodes::SYSTEM_ERROR);
}
else
sent_bytes_total += sent_bytes;
}
}
#endif
int Server::main(const std::vector<std::string> & /*args*/)
try
{
@ -1857,6 +1798,10 @@ try
#if defined(OS_LINUX)
/// Tell the service manager that service startup is finished.
/// NOTE: the parent clickhouse-watchdog process must do systemdNotify("MAINPID={}\n", child_pid); before
/// the child process notifies 'READY=1'. So there is a possibility of a race condition but the chances should be low
/// because the parent process does 'MAINPID=...' straight away after fork() and the child process does the actual
/// initialization and only then sends 'READY=1'.
systemdNotify("READY=1\n");
#endif

View File

@ -76,6 +76,7 @@ namespace DB
{
extern const int CANNOT_SET_SIGNAL_HANDLER;
extern const int CANNOT_SEND_SIGNAL;
extern const int SYSTEM_ERROR;
}
}
@ -1013,6 +1014,15 @@ void BaseDaemon::setupWatchdog()
return;
}
#if defined(OS_LINUX)
/// Tell the service manager the actual main process is not this one but the forked process
/// because it is going to be serving the requests and it is going to send "READY=1" notification
/// when it is fully started.
/// NOTE: we do this right after fork() to minimize chances that the child process finishes initialization
/// and sends "READY=1" before we send "MAINPID=..."
systemdNotify(fmt::format("MAINPID={}\n", pid));
#endif
/// Change short thread name and process name.
setThreadName("clckhouse-watch"); /// 15 characters
@ -1131,3 +1141,58 @@ String BaseDaemon::getStoredBinaryHash() const
{
return stored_binary_hash;
}
#if defined(OS_LINUX)
void systemdNotify(const std::string_view & command)
{
const char * path = getenv("NOTIFY_SOCKET"); // NOLINT(concurrency-mt-unsafe)
if (path == nullptr)
return; /// not using systemd
int s = socket(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0);
if (s == -1)
DB::throwFromErrno("Can't create UNIX socket for systemd notify.", DB::ErrorCodes::SYSTEM_ERROR);
SCOPE_EXIT({ close(s); });
const size_t len = strlen(path);
struct sockaddr_un addr;
addr.sun_family = AF_UNIX;
if (len < 2 || len > sizeof(addr.sun_path) - 1)
throw DB::Exception(DB::ErrorCodes::SYSTEM_ERROR, "NOTIFY_SOCKET env var value \"{}\" is wrong.", path);
memcpy(addr.sun_path, path, len + 1); /// write last zero as well.
size_t addrlen = offsetof(struct sockaddr_un, sun_path) + len;
/// '@' means this is Linux abstract socket, per documentation sun_path[0] must be set to '\0' for it.
if (path[0] == '@')
addr.sun_path[0] = 0;
else if (path[0] == '/')
addrlen += 1; /// non-abstract-addresses should be zero terminated.
else
throw DB::Exception(DB::ErrorCodes::SYSTEM_ERROR, "Wrong UNIX path \"{}\" in NOTIFY_SOCKET env var", path);
const struct sockaddr *sock_addr = reinterpret_cast <const struct sockaddr *>(&addr);
size_t sent_bytes_total = 0;
while (sent_bytes_total < command.size())
{
auto sent_bytes = sendto(s, command.data() + sent_bytes_total, command.size() - sent_bytes_total, 0, sock_addr, static_cast<socklen_t>(addrlen));
if (sent_bytes == -1)
{
if (errno == EINTR)
continue;
else
DB::throwFromErrno("Failed to notify systemd, sendto returned error.", DB::ErrorCodes::SYSTEM_ERROR);
}
else
sent_bytes_total += sent_bytes;
}
}
#endif

View File

@ -197,3 +197,9 @@ std::optional<std::reference_wrapper<Daemon>> BaseDaemon::tryGetInstance()
else
return {};
}
#if defined(OS_LINUX)
/// Sends notification (e.g. "server is ready") to systemd, analogous to sd_notify from libsystemd.
/// See https://www.freedesktop.org/software/systemd/man/sd_notify.html for more information on the supported notifications.
void systemdNotify(const std::string_view & command);
#endif