From 3fdff1abea94fc24d4c22e4ce9573fbf64606a68 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 22 Aug 2018 00:05:30 +0300 Subject: [PATCH] TaskStats: better code #2482 --- dbms/programs/server/Server.cpp | 2 +- dbms/src/Common/TaskStatsInfoGetter.cpp | 49 ++++++++++++++----------- dbms/src/Common/TaskStatsInfoGetter.h | 2 +- dbms/src/Common/ThreadStatus.cpp | 2 +- 4 files changed, 30 insertions(+), 25 deletions(-) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index f7d66269aea..b22ab82559d 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -366,7 +366,7 @@ int Server::main(const std::vector & /*args*/) dns_cache_updater = std::make_unique(*global_context); } - if (!TaskStatsInfoGetter::checkProcessHasRequiredPermissions()) + if (!TaskStatsInfoGetter::checkPermissions()) { LOG_INFO(log, "It looks like the process has no CAP_NET_ADMIN capability, some performance statistics will be disabled." " It could happen due to incorrect ClickHouse package installation." diff --git a/dbms/src/Common/TaskStatsInfoGetter.cpp b/dbms/src/Common/TaskStatsInfoGetter.cpp index 339016a2bcc..2662a60cae3 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.cpp +++ b/dbms/src/Common/TaskStatsInfoGetter.cpp @@ -14,6 +14,7 @@ #include #include + /// Basic idea is motivated by "iotop" tool. /// More info: https://www.kernel.org/doc/Documentation/accounting/taskstats.txt @@ -46,7 +47,7 @@ struct NetlinkMessage }; -int sendCommand( +void sendCommand( int sock_fd, UInt16 nlmsg_type, UInt32 nlmsg_pid, @@ -91,10 +92,8 @@ int sendCommand( buflen -= r; } else if (errno != EAGAIN) - return -1; + throwFromErrno("Can't send a Netlink command", ErrorCodes::NETLINK_ERROR); } - - return 0; } @@ -109,16 +108,19 @@ UInt16 getFamilyId(int nl_sock_fd) noexcept static char name[] = TASKSTATS_GENL_NAME; - if (sendCommand( + sendCommand( nl_sock_fd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY, CTRL_ATTR_FAMILY_NAME, (void *) name, - strlen(TASKSTATS_GENL_NAME) + 1)) - return 0; + strlen(TASKSTATS_GENL_NAME) + 1); UInt16 id = 0; ssize_t rep_len = ::recv(nl_sock_fd, &answer, sizeof(answer), 0); - if (answer.header.nlmsg_type == NLMSG_ERROR || (rep_len < 0) || !NLMSG_OK((&answer.header), rep_len)) - return 0; + if (rep_len < 0) + throwFromErrno("Cannot get the family id for " + std::string(TASKSTATS_GENL_NAME) + " from the Netlink socket", ErrorCodes::NETLINK_ERROR); + + if (answer.header.nlmsg_type == NLMSG_ERROR ||!NLMSG_OK((&answer.header), rep_len)) + throw Exception("Received an error instead of the family id for " + std::string(TASKSTATS_GENL_NAME) + + " from the Netlink socket", ErrorCodes::NETLINK_ERROR); const ::nlattr * attr; attr = static_cast(GENLMSG_DATA(&answer)); @@ -134,19 +136,20 @@ UInt16 getFamilyId(int nl_sock_fd) noexcept TaskStatsInfoGetter::TaskStatsInfoGetter() = default; + void TaskStatsInfoGetter::init() { if (netlink_socket_fd >= 0) return; - struct timeval tv; - tv.tv_sec = 0; - tv.tv_usec = 50000; - netlink_socket_fd = ::socket(PF_NETLINK, SOCK_RAW, NETLINK_GENERIC); if (netlink_socket_fd < 0) throwFromErrno("Can't create PF_NETLINK socket", ErrorCodes::NETLINK_ERROR); + struct timeval tv; + tv.tv_sec = 0; + tv.tv_usec = 50000; + if (0 != ::setsockopt(netlink_socket_fd, SOL_SOCKET, SO_RCVTIMEO, reinterpret_cast(&tv), sizeof(tv))) throwFromErrno("Can't set timeout on PF_NETLINK socket", ErrorCodes::NETLINK_ERROR); @@ -159,12 +162,12 @@ void TaskStatsInfoGetter::init() netlink_family_id = getFamilyId(netlink_socket_fd); } + bool TaskStatsInfoGetter::getStatImpl(int tid, ::taskstats & out_stats, bool throw_on_error) { init(); - if (sendCommand(netlink_socket_fd, netlink_family_id, tid, TASKSTATS_CMD_GET, TASKSTATS_CMD_ATTR_PID, &tid, sizeof(pid_t))) - throwFromErrno("Can't send a Netlink command", ErrorCodes::NETLINK_ERROR); + sendCommand(netlink_socket_fd, netlink_family_id, tid, TASKSTATS_CMD_GET, TASKSTATS_CMD_ATTR_PID, &tid, sizeof(pid_t)); NetlinkMessage msg; ssize_t rv = ::recv(netlink_socket_fd, &msg, sizeof(msg), 0); @@ -212,6 +215,7 @@ bool TaskStatsInfoGetter::getStatImpl(int tid, ::taskstats & out_stats, bool thr return true; } + void TaskStatsInfoGetter::getStat(::taskstats & stat, int tid) { tid = tid < 0 ? getDefaultTID() : tid; @@ -224,12 +228,6 @@ bool TaskStatsInfoGetter::tryGetStat(::taskstats & stat, int tid) return getStatImpl(tid, stat, false); } -TaskStatsInfoGetter::~TaskStatsInfoGetter() -{ - if (netlink_socket_fd >= 0) - close(netlink_socket_fd); -} - int TaskStatsInfoGetter::getCurrentTID() { /// This call is always successful. - man gettid @@ -251,11 +249,18 @@ static bool tryGetTaskStats() return getter.tryGetStat(stat); } -bool TaskStatsInfoGetter::checkProcessHasRequiredPermissions() +bool TaskStatsInfoGetter::checkPermissions() { /// It is thread- and exception- safe since C++11 static bool res = tryGetTaskStats(); return res; } + +TaskStatsInfoGetter::~TaskStatsInfoGetter() +{ + if (netlink_socket_fd >= 0) + close(netlink_socket_fd); +} + } diff --git a/dbms/src/Common/TaskStatsInfoGetter.h b/dbms/src/Common/TaskStatsInfoGetter.h index c89194cf88a..467422817f9 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.h +++ b/dbms/src/Common/TaskStatsInfoGetter.h @@ -26,7 +26,7 @@ public: static int getCurrentTID(); /// Whether the current process has permissions (sudo or cap_net_admin capabilties) to get taskstats info - static bool checkProcessHasRequiredPermissions(); + static bool checkPermissions(); private: /// Caches current thread tid to avoid extra sys calls diff --git a/dbms/src/Common/ThreadStatus.cpp b/dbms/src/Common/ThreadStatus.cpp index d4cca1b326c..d4a47403a2b 100644 --- a/dbms/src/Common/ThreadStatus.cpp +++ b/dbms/src/Common/ThreadStatus.cpp @@ -70,7 +70,7 @@ void ThreadStatus::initPerformanceCounters() ++queries_started; *last_rusage = RUsageCounters::current(query_start_time_nanoseconds); - has_permissions_for_taskstats = TaskStatsInfoGetter::checkProcessHasRequiredPermissions(); + has_permissions_for_taskstats = TaskStatsInfoGetter::checkPermissions(); if (has_permissions_for_taskstats) *last_taskstats = TasksStatsCounters::current(); }