Merge pull request #65239 from ClickHouse/tracing-try-2

Initialize global profiler for Poco::ThreadPool (needed for keeper, etc)
This commit is contained in:
Kseniia Sumarokova 2024-06-19 09:00:46 +00:00 committed by GitHub
commit 784f66cf2f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 309 additions and 43 deletions

View File

@ -213,6 +213,7 @@ target_compile_definitions (_poco_foundation
)
target_include_directories (_poco_foundation SYSTEM PUBLIC "include")
target_link_libraries (_poco_foundation PRIVATE clickhouse_common_io)
target_link_libraries (_poco_foundation
PRIVATE

View File

@ -48,7 +48,13 @@ class Foundation_API ThreadPool
/// from the pool.
{
public:
ThreadPool(int minCapacity = 2, int maxCapacity = 16, int idleTime = 60, int stackSize = POCO_THREAD_STACK_SIZE);
explicit ThreadPool(
int minCapacity = 2,
int maxCapacity = 16,
int idleTime = 60,
int stackSize = POCO_THREAD_STACK_SIZE,
size_t global_profiler_real_time_period_ns_ = 0,
size_t global_profiler_cpu_time_period_ns_ = 0);
/// Creates a thread pool with minCapacity threads.
/// If required, up to maxCapacity threads are created
/// a NoThreadAvailableException exception is thrown.
@ -56,8 +62,14 @@ public:
/// and more than minCapacity threads are running, the thread
/// is killed. Threads are created with given stack size.
ThreadPool(
const std::string & name, int minCapacity = 2, int maxCapacity = 16, int idleTime = 60, int stackSize = POCO_THREAD_STACK_SIZE);
explicit ThreadPool(
const std::string & name,
int minCapacity = 2,
int maxCapacity = 16,
int idleTime = 60,
int stackSize = POCO_THREAD_STACK_SIZE,
size_t global_profiler_real_time_period_ns_ = 0,
size_t global_profiler_cpu_time_period_ns_ = 0);
/// Creates a thread pool with the given name and minCapacity threads.
/// If required, up to maxCapacity threads are created
/// a NoThreadAvailableException exception is thrown.
@ -171,6 +183,8 @@ private:
int _serial;
int _age;
int _stackSize;
size_t _globalProfilerRealTimePeriodNs;
size_t _globalProfilerCPUTimePeriodNs;
ThreadVec _threads;
mutable FastMutex _mutex;
};

View File

@ -20,6 +20,7 @@
#include "Poco/ErrorHandler.h"
#include <sstream>
#include <ctime>
#include <Common/ThreadPool.h>
namespace Poco {
@ -28,7 +29,11 @@ namespace Poco {
class PooledThread: public Runnable
{
public:
PooledThread(const std::string& name, int stackSize = POCO_THREAD_STACK_SIZE);
explicit PooledThread(
const std::string& name,
int stackSize = POCO_THREAD_STACK_SIZE,
size_t globalProfilerRealTimePeriodNs_ = 0,
size_t globalProfilerCPUTimePeriodNs_ = 0);
~PooledThread();
void start();
@ -51,16 +56,24 @@ private:
Event _targetCompleted;
Event _started;
FastMutex _mutex;
size_t _globalProfilerRealTimePeriodNs;
size_t _globalProfilerCPUTimePeriodNs;
};
PooledThread::PooledThread(const std::string& name, int stackSize):
_idle(true),
_idleTime(0),
_pTarget(0),
_name(name),
PooledThread::PooledThread(
const std::string& name,
int stackSize,
size_t globalProfilerRealTimePeriodNs_,
size_t globalProfilerCPUTimePeriodNs_) :
_idle(true),
_idleTime(0),
_pTarget(0),
_name(name),
_thread(name),
_targetCompleted(false)
_targetCompleted(false),
_globalProfilerRealTimePeriodNs(globalProfilerRealTimePeriodNs_),
_globalProfilerCPUTimePeriodNs(globalProfilerCPUTimePeriodNs_)
{
poco_assert_dbg (stackSize >= 0);
_thread.setStackSize(stackSize);
@ -83,7 +96,7 @@ void PooledThread::start()
void PooledThread::start(Thread::Priority priority, Runnable& target)
{
FastMutex::ScopedLock lock(_mutex);
poco_assert (_pTarget == 0);
_pTarget = &target;
@ -109,7 +122,7 @@ void PooledThread::start(Thread::Priority priority, Runnable& target, const std:
}
_thread.setName(fullName);
_thread.setPriority(priority);
poco_assert (_pTarget == 0);
_pTarget = &target;
@ -145,7 +158,7 @@ void PooledThread::join()
void PooledThread::activate()
{
FastMutex::ScopedLock lock(_mutex);
poco_assert (_idle);
_idle = false;
_targetCompleted.reset();
@ -155,7 +168,7 @@ void PooledThread::activate()
void PooledThread::release()
{
const long JOIN_TIMEOUT = 10000;
_mutex.lock();
_pTarget = 0;
_mutex.unlock();
@ -174,6 +187,10 @@ void PooledThread::release()
void PooledThread::run()
{
DB::ThreadStatus thread_status;
if (unlikely(_globalProfilerRealTimePeriodNs != 0 || _globalProfilerCPUTimePeriodNs != 0))
thread_status.initGlobalProfiler(_globalProfilerRealTimePeriodNs, _globalProfilerCPUTimePeriodNs);
_started.set();
for (;;)
{
@ -220,13 +237,17 @@ void PooledThread::run()
ThreadPool::ThreadPool(int minCapacity,
int maxCapacity,
int idleTime,
int stackSize):
_minCapacity(minCapacity),
_maxCapacity(maxCapacity),
int stackSize,
size_t globalProfilerRealTimePeriodNs_,
size_t globalProfilerCPUTimePeriodNs_) :
_minCapacity(minCapacity),
_maxCapacity(maxCapacity),
_idleTime(idleTime),
_serial(0),
_age(0),
_stackSize(stackSize)
_stackSize(stackSize),
_globalProfilerRealTimePeriodNs(globalProfilerRealTimePeriodNs_),
_globalProfilerCPUTimePeriodNs(globalProfilerCPUTimePeriodNs_)
{
poco_assert (minCapacity >= 1 && maxCapacity >= minCapacity && idleTime > 0);
@ -243,14 +264,18 @@ ThreadPool::ThreadPool(const std::string& name,
int minCapacity,
int maxCapacity,
int idleTime,
int stackSize):
int stackSize,
size_t globalProfilerRealTimePeriodNs_,
size_t globalProfilerCPUTimePeriodNs_) :
_name(name),
_minCapacity(minCapacity),
_maxCapacity(maxCapacity),
_minCapacity(minCapacity),
_maxCapacity(maxCapacity),
_idleTime(idleTime),
_serial(0),
_age(0),
_stackSize(stackSize)
_stackSize(stackSize),
_globalProfilerRealTimePeriodNs(globalProfilerRealTimePeriodNs_),
_globalProfilerCPUTimePeriodNs(globalProfilerCPUTimePeriodNs_)
{
poco_assert (minCapacity >= 1 && maxCapacity >= minCapacity && idleTime > 0);
@ -393,15 +418,15 @@ void ThreadPool::housekeep()
ThreadVec activeThreads;
idleThreads.reserve(_threads.size());
activeThreads.reserve(_threads.size());
for (ThreadVec::iterator it = _threads.begin(); it != _threads.end(); ++it)
{
if ((*it)->idle())
{
if ((*it)->idleTime() < _idleTime)
idleThreads.push_back(*it);
else
expiredThreads.push_back(*it);
else
expiredThreads.push_back(*it);
}
else activeThreads.push_back(*it);
}
@ -463,7 +488,7 @@ PooledThread* ThreadPool::createThread()
{
std::ostringstream name;
name << _name << "[#" << ++_serial << "]";
return new PooledThread(name.str(), _stackSize);
return new PooledThread(name.str(), _stackSize, _globalProfilerRealTimePeriodNs, _globalProfilerCPUTimePeriodNs);
}
@ -481,7 +506,7 @@ public:
ThreadPool* pool()
{
FastMutex::ScopedLock lock(_mutex);
if (!_pPool)
{
_pPool = new ThreadPool("default");
@ -490,7 +515,7 @@ public:
}
return _pPool;
}
private:
ThreadPool* _pPool;
FastMutex _mutex;

View File

@ -10,6 +10,7 @@
#include <Poco/Net/NetException.h>
#include <Poco/Util/HelpFormatter.h>
#include <Poco/Environment.h>
#include <Poco/Config.h>
#include <Common/scope_guard_safe.h>
#include <Common/logger_useful.h>
#include <base/phdr_cache.h>
@ -721,11 +722,6 @@ try
CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::getVersionRevision());
CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger());
Poco::ThreadPool server_pool(3, server_settings.max_connections);
std::mutex servers_lock;
std::vector<ProtocolServerAdapter> servers;
std::vector<ProtocolServerAdapter> servers_to_start_before_tables;
/** Context contains all that query execution is dependent:
* settings, available functions, data types, aggregate functions, databases, ...
*/
@ -823,6 +819,18 @@ try
total_memory_tracker.setSampleMaxAllocationSize(server_settings.total_memory_profiler_sample_max_allocation_size);
}
Poco::ThreadPool server_pool(
/* minCapacity */3,
/* maxCapacity */server_settings.max_connections,
/* idleTime */60,
/* stackSize */POCO_THREAD_STACK_SIZE,
server_settings.global_profiler_real_time_period_ns,
server_settings.global_profiler_cpu_time_period_ns);
std::mutex servers_lock;
std::vector<ProtocolServerAdapter> servers;
std::vector<ProtocolServerAdapter> servers_to_start_before_tables;
/// Wait for all threads to avoid possible use-after-free (for example logging objects can be already destroyed).
SCOPE_EXIT({
Stopwatch watch;

View File

@ -1063,7 +1063,6 @@ void HTTPHandler::formatExceptionForClient(int exception_code, HTTPServerRequest
void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event)
{
setThreadName("HTTPHandler");
ThreadStatus thread_status;
session = std::make_unique<Session>(server.context(), ClientInfo::Interface::HTTP, request.isSecure());
SCOPE_EXIT({ session.reset(); });

View File

@ -8,7 +8,6 @@
#include <Interpreters/InterserverIOHandler.h>
#include <Server/HTTP/HTMLForm.h>
#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
#include <Common/ThreadStatus.h>
#include <Common/logger_useful.h>
#include <Common/setThreadName.h>
@ -81,7 +80,6 @@ void InterserverIOHTTPHandler::processQuery(HTTPServerRequest & request, HTTPSer
void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event)
{
setThreadName("IntersrvHandler");
ThreadStatus thread_status;
/// In order to work keep-alive.
if (request.getVersion() == HTTPServerRequest::HTTP_1_1)

View File

@ -309,7 +309,6 @@ Poco::Timespan KeeperTCPHandler::receiveHandshake(int32_t handshake_length, bool
void KeeperTCPHandler::runImpl()
{
setThreadName("KeeperHandler");
ThreadStatus thread_status;
socket().setReceiveTimeout(receive_timeout);
socket().setSendTimeout(send_timeout);

View File

@ -24,7 +24,6 @@
#include <Common/CurrentThread.h>
#include <Common/NetException.h>
#include <Common/OpenSSLHelpers.h>
#include <Common/ThreadStatus.h>
#include <Common/config_version.h>
#include <Common/logger_useful.h>
#include <Common/re2.h>
@ -199,7 +198,6 @@ MySQLHandler::~MySQLHandler() = default;
void MySQLHandler::run()
{
setThreadName("MySQLHandler");
ThreadStatus thread_status;
session = std::make_unique<Session>(server.context(), ClientInfo::Interface::MYSQL);
SCOPE_EXIT({ session.reset(); });

View File

@ -10,7 +10,6 @@
#include <base/scope_guard.h>
#include <pcg_random.hpp>
#include <Common/CurrentThread.h>
#include <Common/ThreadStatus.h>
#include <Common/config_version.h>
#include <Common/randomSeed.h>
#include <Common/setThreadName.h>
@ -59,7 +58,6 @@ void PostgreSQLHandler::changeIO(Poco::Net::StreamSocket & socket)
void PostgreSQLHandler::run()
{
setThreadName("PostgresHandler");
ThreadStatus thread_status;
session = std::make_unique<Session>(server.context(), ClientInfo::Interface::POSTGRESQL);
SCOPE_EXIT({ session.reset(); });

View File

@ -246,7 +246,6 @@ TCPHandler::~TCPHandler()
void TCPHandler::runImpl()
{
setThreadName("TCPHandler");
ThreadStatus thread_status;
extractConnectionSettingsFromContext(server.context());

View File

@ -7,7 +7,6 @@
#include <Common/ProfileEvents.h>
#include <Common/CurrentMetrics.h>
#include <Common/Stopwatch.h>
#include <Common/ThreadStatus.h>
#include <Core/Protocol.h>
#include <Core/QueryProcessingStage.h>
#include <IO/Progress.h>

View File

@ -0,0 +1,44 @@
<clickhouse>
<keeper_server>
<s3_snapshot>
<endpoint>http://minio1:9001/snapshots/</endpoint>
<access_key_id>minio</access_key_id>
<secret_access_key>minio123</secret_access_key>
</s3_snapshot>
<tcp_port>9181</tcp_port>
<server_id>1</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
<four_letter_word_white_list>*</four_letter_word_white_list>
<coordination_settings>
<operation_timeout_ms>5000</operation_timeout_ms>
<session_timeout_ms>10000</session_timeout_ms>
<min_session_timeout_ms>5000</min_session_timeout_ms>
<snapshot_distance>50</snapshot_distance>
<raft_logs_level>trace</raft_logs_level>
</coordination_settings>
<raft_configuration>
<server>
<id>1</id>
<hostname>node1</hostname>
<port>9234</port>
</server>
<server>
<id>2</id>
<hostname>node2</hostname>
<port>9234</port>
<start_as_follower>true</start_as_follower>
</server>
<server>
<id>3</id>
<hostname>node3</hostname>
<port>9234</port>
<start_as_follower>true</start_as_follower>
</server>
</raft_configuration>
</keeper_server>
<global_profiler_real_time_period_ns>1000000000</global_profiler_real_time_period_ns>
<global_profiler_cpu_time_period_ns>1000000000</global_profiler_cpu_time_period_ns>
</clickhouse>

View File

@ -0,0 +1,44 @@
<clickhouse>
<keeper_server>
<s3_snapshot>
<endpoint>http://minio1:9001/snapshots/</endpoint>
<access_key_id>minio</access_key_id>
<secret_access_key>minio123</secret_access_key>
</s3_snapshot>
<tcp_port>9181</tcp_port>
<server_id>2</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
<four_letter_word_white_list>*</four_letter_word_white_list>
<coordination_settings>
<operation_timeout_ms>5000</operation_timeout_ms>
<session_timeout_ms>10000</session_timeout_ms>
<min_session_timeout_ms>5000</min_session_timeout_ms>
<snapshot_distance>75</snapshot_distance>
<raft_logs_level>trace</raft_logs_level>
</coordination_settings>
<raft_configuration>
<server>
<id>1</id>
<hostname>node1</hostname>
<port>9234</port>
</server>
<server>
<id>2</id>
<hostname>node2</hostname>
<port>9234</port>
<start_as_follower>true</start_as_follower>
</server>
<server>
<id>3</id>
<hostname>node3</hostname>
<port>9234</port>
<start_as_follower>true</start_as_follower>
</server>
</raft_configuration>
</keeper_server>
<global_profiler_real_time_period_ns>1000000000</global_profiler_real_time_period_ns>
<global_profiler_cpu_time_period_ns>1000000000</global_profiler_cpu_time_period_ns>
</clickhouse>

View File

@ -0,0 +1,44 @@
<clickhouse>
<keeper_server>
<s3_snapshot>
<endpoint>http://minio1:9001/snapshots/</endpoint>
<access_key_id>minio</access_key_id>
<secret_access_key>minio123</secret_access_key>
</s3_snapshot>
<tcp_port>9181</tcp_port>
<server_id>3</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
<four_letter_word_white_list>*</four_letter_word_white_list>
<coordination_settings>
<operation_timeout_ms>5000</operation_timeout_ms>
<session_timeout_ms>10000</session_timeout_ms>
<min_session_timeout_ms>5000</min_session_timeout_ms>
<snapshot_distance>75</snapshot_distance>
<raft_logs_level>trace</raft_logs_level>
</coordination_settings>
<raft_configuration>
<server>
<id>1</id>
<hostname>node1</hostname>
<port>9234</port>
</server>
<server>
<id>2</id>
<hostname>node2</hostname>
<port>9234</port>
<start_as_follower>true</start_as_follower>
</server>
<server>
<id>3</id>
<hostname>node3</hostname>
<port>9234</port>
<start_as_follower>true</start_as_follower>
</server>
</raft_configuration>
</keeper_server>
<global_profiler_real_time_period_ns>1000000000</global_profiler_real_time_period_ns>
<global_profiler_cpu_time_period_ns>1000000000</global_profiler_cpu_time_period_ns>
</clickhouse>

View File

@ -0,0 +1,96 @@
import pytest
from helpers.cluster import ClickHouseCluster
from helpers.test_tools import TSV
from helpers.keeper_utils import KeeperClient, KeeperException
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance(
"node1",
main_configs=["configs/keeper_config1.xml"],
stay_alive=True,
)
node2 = cluster.add_instance(
"node2",
main_configs=["configs/keeper_config2.xml"],
stay_alive=True,
with_minio=True,
)
node3 = cluster.add_instance(
"node3",
main_configs=["configs/keeper_config3.xml"],
stay_alive=True,
with_minio=True,
)
@pytest.fixture(scope="module", autouse=True)
def started_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def test_profiler(started_cluster):
node = cluster.instances["node1"]
if node.is_built_with_sanitizer():
return
node.query(
"CREATE TABLE t (key UInt32, value String) Engine = ReplicatedMergeTree('/clickhouse-tables/test1', 'r1') ORDER BY key"
)
for _ in range(100):
node.query("INSERT INTO t SELECT number, toString(number) from numbers(100)")
node.query("system flush logs")
assert int(node.query("exists system.trace_log"))
result = node.query(
"""
set allow_introspection_functions=1;
system flush logs;
select cnt from (
select count() as cnt, formatReadableSize(sum(size)),
arrayStringConcat(
arrayMap(x, y -> concat(x, ': ', y), arrayMap(x -> addressToLine(x), trace), arrayMap(x -> demangle(addressToSymbol(x)), trace)),
'\n') as trace
from system.trace_log where trace_type = Real and (trace ilike '%KeeperTCPHandler%' or trace ilike '%KeeperDispatcher%') group by trace order by cnt desc) limit 1;
"""
)
if len(result) == 0:
assert 0 < int(
node.query(
"""
set allow_introspection_functions=1;
system flush logs;
select sum(cnt) from (
select count() as cnt, formatReadableSize(sum(size)),
arrayStringConcat(
arrayMap(x, y -> concat(x, ': ', y), arrayMap(x -> addressToLine(x), trace), arrayMap(x -> demangle(addressToSymbol(x)), trace)),
'\n') as trace
from system.trace_log where trace_type = Real group by trace);
"""
)
)
result = node.query(
"""
set allow_introspection_functions=1;
system flush logs;
select * from (
select count() as cnt, formatReadableSize(sum(size)),
arrayStringConcat(
arrayMap(x, y -> concat(x, ': ', y), arrayMap(x -> addressToLine(x), trace), arrayMap(x -> demangle(addressToSymbol(x)), trace)),
'\n') as trace
from system.trace_log where trace_type = Real group by trace);
"""
)
print(result)
assert False
assert 1 < int(result)