Merge pull request #43597 from ClickHouse/sd_notify_followup_fixes

Followup fixes for systemd notification (#43400)
This commit is contained in:
Alexey Milovidov 2022-12-17 04:35:05 +03:00 committed by GitHub
commit 30fd9b8317
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 99 additions and 49 deletions

View File

@ -11,8 +11,8 @@ Wants=time-sync.target
[Service]
Type=notify
# Switching off watchdog is very important for sd_notify to work correctly.
Environment=CLICKHOUSE_WATCHDOG_ENABLE=0
# NOTE: we leave clickhouse watchdog process enabled to be able to see OOM/SIGKILL traces in clickhouse-server.log files.
# If you wish to disable the watchdog and rely on systemd logs just add "Environment=CLICKHOUSE_WATCHDOG_ENABLE=0" line.
User=clickhouse
Group=clickhouse
Restart=always

View File

@ -287,7 +287,6 @@ namespace ErrorCodes
extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA;
extern const int NETWORK_ERROR;
extern const int CORRUPTED_DATA;
extern const int SYSTEM_ERROR;
}
@ -661,51 +660,6 @@ static void sanityChecks(Server & server)
}
}
#if defined(OS_LINUX)
/// Sends notification to systemd, analogous to sd_notify from libsystemd
static void systemdNotify(const std::string_view & command)
{
const char * path = getenv("NOTIFY_SOCKET"); // NOLINT(concurrency-mt-unsafe)
if (path == nullptr)
return; /// not using systemd
int s = socket(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0);
if (s == -1)
throwFromErrno("Can't create UNIX socket for systemd notify.", ErrorCodes::SYSTEM_ERROR);
SCOPE_EXIT({ close(s); });
const size_t len = strlen(path);
struct sockaddr_un addr;
addr.sun_family = AF_UNIX;
if (len < 2 || len > sizeof(addr.sun_path) - 1)
throw Exception(ErrorCodes::SYSTEM_ERROR, "NOTIFY_SOCKET env var value \"{}\" is wrong.", path);
memcpy(addr.sun_path, path, len + 1); /// write last zero as well.
size_t addrlen = offsetof(struct sockaddr_un, sun_path) + len;
/// '@' meass this is Linux abstract socket, per documentation it must be sun_path[0] must be set to '\0' for it.
if (path[0] == '@')
addr.sun_path[0] = 0;
else if (path[0] == '/')
addrlen += 1; /// non-abstract-addresses should be zero terminated.
else
throw Exception(ErrorCodes::SYSTEM_ERROR, "Wrong UNIX path \"{}\" in NOTIFY_SOCKET env var", path);
const struct sockaddr *sock_addr = reinterpret_cast <const struct sockaddr *>(&addr);
if (sendto(s, command.data(), command.size(), 0, sock_addr, static_cast <socklen_t>(addrlen)) != static_cast <ssize_t>(command.size()))
throw Exception("Failed to notify systemd.", ErrorCodes::SYSTEM_ERROR);
}
#endif
int Server::main(const std::vector<std::string> & /*args*/)
try
{
@ -1861,6 +1815,9 @@ try
}
#if defined(OS_LINUX)
/// Tell the service manager that service startup is finished.
/// NOTE: the parent clickhouse-watchdog process must do systemdNotify("MAINPID={}\n", child_pid); before
/// the child process notifies 'READY=1'.
systemdNotify("READY=1\n");
#endif

View File

@ -30,6 +30,7 @@
#include <Poco/Util/Application.h>
#include <Poco/Exception.h>
#include <Poco/ErrorHandler.h>
#include <Poco/Pipe.h>
#include <Common/ErrorHandlers.h>
#include <base/argsToConfig.h>
@ -76,6 +77,7 @@ namespace DB
{
extern const int CANNOT_SET_SIGNAL_HANDLER;
extern const int CANNOT_SEND_SIGNAL;
extern const int SYSTEM_ERROR;
}
}
@ -1007,11 +1009,15 @@ void BaseDaemon::setupWatchdog()
while (true)
{
/// This pipe is used to synchronize notifications to the service manager from the child process
/// to be sent after the notifications from the parent process.
Poco::Pipe notify_sync;
static pid_t pid = -1;
pid = fork();
if (-1 == pid)
throw Poco::Exception("Cannot fork");
DB::throwFromErrno("Cannot fork", DB::ErrorCodes::SYSTEM_ERROR);
if (0 == pid)
{
@ -1019,10 +1025,36 @@ void BaseDaemon::setupWatchdog()
#if defined(OS_LINUX)
if (0 != prctl(PR_SET_PDEATHSIG, SIGKILL))
logger().warning("Cannot do prctl to ask termination with parent.");
if (getppid() == 1)
throw Poco::Exception("Parent watchdog process has exited.");
#endif
{
notify_sync.close(Poco::Pipe::CLOSE_WRITE);
/// Read from the pipe will block until the pipe is closed.
/// This way we synchronize with the parent process.
char buf[1];
if (0 != notify_sync.readBytes(buf, sizeof(buf)))
throw Poco::Exception("Unexpected result while waiting for watchdog synchronization pipe to close.");
}
return;
}
#if defined(OS_LINUX)
/// Tell the service manager the actual main process is not this one but the forked process
/// because it is going to be serving the requests and it is going to send "READY=1" notification
/// when it is fully started.
/// NOTE: we do this right after fork() and then notify the child process to "unblock" so that it finishes initialization
/// and sends "READY=1" after we have sent "MAINPID=..."
systemdNotify(fmt::format("MAINPID={}\n", pid));
#endif
/// Close the pipe after notifying the service manager.
/// The child process is waiting for the pipe to be closed.
notify_sync.close();
/// Change short thread name and process name.
setThreadName("clckhouse-watch"); /// 15 characters
@ -1141,3 +1173,58 @@ String BaseDaemon::getStoredBinaryHash() const
{
return stored_binary_hash;
}
#if defined(OS_LINUX)
void systemdNotify(const std::string_view & command)
{
const char * path = getenv("NOTIFY_SOCKET"); // NOLINT(concurrency-mt-unsafe)
if (path == nullptr)
return; /// not using systemd
int s = socket(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0);
if (s == -1)
DB::throwFromErrno("Can't create UNIX socket for systemd notify.", DB::ErrorCodes::SYSTEM_ERROR);
SCOPE_EXIT({ close(s); });
const size_t len = strlen(path);
struct sockaddr_un addr;
addr.sun_family = AF_UNIX;
if (len < 2 || len > sizeof(addr.sun_path) - 1)
throw DB::Exception(DB::ErrorCodes::SYSTEM_ERROR, "NOTIFY_SOCKET env var value \"{}\" is wrong.", path);
memcpy(addr.sun_path, path, len + 1); /// write last zero as well.
size_t addrlen = offsetof(struct sockaddr_un, sun_path) + len;
/// '@' means this is Linux abstract socket, per documentation sun_path[0] must be set to '\0' for it.
if (path[0] == '@')
addr.sun_path[0] = 0;
else if (path[0] == '/')
addrlen += 1; /// non-abstract-addresses should be zero terminated.
else
throw DB::Exception(DB::ErrorCodes::SYSTEM_ERROR, "Wrong UNIX path \"{}\" in NOTIFY_SOCKET env var", path);
const struct sockaddr *sock_addr = reinterpret_cast <const struct sockaddr *>(&addr);
size_t sent_bytes_total = 0;
while (sent_bytes_total < command.size())
{
auto sent_bytes = sendto(s, command.data() + sent_bytes_total, command.size() - sent_bytes_total, 0, sock_addr, static_cast<socklen_t>(addrlen));
if (sent_bytes == -1)
{
if (errno == EINTR)
continue;
else
DB::throwFromErrno("Failed to notify systemd, sendto returned error.", DB::ErrorCodes::SYSTEM_ERROR);
}
else
sent_bytes_total += sent_bytes;
}
}
#endif

View File

@ -197,3 +197,9 @@ std::optional<std::reference_wrapper<Daemon>> BaseDaemon::tryGetInstance()
else
return {};
}
#if defined(OS_LINUX)
/// Sends notification (e.g. "server is ready") to systemd, analogous to sd_notify from libsystemd.
/// See https://www.freedesktop.org/software/systemd/man/sd_notify.html for more information on the supported notifications.
void systemdNotify(const std::string_view & command);
#endif