From 64b44be513607d59e4b89dd9fb518b8cf4acba21 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 16 Sep 2022 12:52:38 +0200 Subject: [PATCH] Account any IO that is done from ThreadPoolReader In case of some issues with the disk it is possible that preadv2(RWF_NOWAIT)==EAGAIN can take a lot of time, so let's measure even this case. I've verified with a simple (almost) loop of preadv2(RWF_NOWAIT), and got this: EAGAIN: 8565670 (out of 17148317) EAGAIN: 8565670 (out of 17148317) EAGAIN: 8565670 (out of 17148317) too long at offset=8691694, ret=18446744073709551615 (14.5016) too long at offset=8694766, ret=18446744073709551615 (14.4338) too long at offset=8694766, ret=18446744073709551615 (16.2144) So here you can see that preadv2(RWF_NOWAIT) took 14-16 seconds in case of failure. Complete code can be found here [1]. [1]: https://gist.github.com/azat/f4a6b0750316c42bec021064e0a7627c I came up with this patch because I saw some stranginess in ProfileEvents, that does not show all the time that was spend in IO when pread_threadpool method was used. Also there was some stranginess with trace_log too. Signed-off-by: Azat Khuzhin --- src/Disks/IO/ThreadPoolReader.cpp | 32 ++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/src/Disks/IO/ThreadPoolReader.cpp b/src/Disks/IO/ThreadPoolReader.cpp index 9b38607c204..d2b3bcbaa5e 100644 --- a/src/Disks/IO/ThreadPoolReader.cpp +++ b/src/Disks/IO/ThreadPoolReader.cpp @@ -108,8 +108,19 @@ std::future ThreadPoolReader::submit(Request reques if (has_pread_nowait_support.load(std::memory_order_relaxed)) { + /// It reports real time spent including the time spent while thread was preempted doing nothing. + /// And it is Ok for the purpose of this watch (it is used to lower the number of threads to read from tables). + /// Sometimes it is better to use taskstats::blkio_delay_total, but it is quite expensive to get it + /// (TaskStatsInfoGetter has about 500K RPS). Stopwatch watch(CLOCK_MONOTONIC); + SCOPE_EXIT({ + watch.stop(); + + ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheHitElapsedMicroseconds, watch.elapsedMicroseconds()); + ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds()); + }); + std::promise promise; std::future future = promise.get_future(); @@ -135,11 +146,6 @@ std::future ThreadPoolReader::submit(Request reques { /// The file has ended. promise.set_value({0, 0}); - - watch.stop(); - ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheHitElapsedMicroseconds, watch.elapsedMicroseconds()); - ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds()); - return future; } @@ -179,18 +185,10 @@ std::future ThreadPoolReader::submit(Request reques if (bytes_read) { - /// It reports real time spent including the time spent while thread was preempted doing nothing. - /// And it is Ok for the purpose of this watch (it is used to lower the number of threads to read from tables). - /// Sometimes it is better to use taskstats::blkio_delay_total, but it is quite expensive to get it - /// (TaskStatsInfoGetter has about 500K RPS). - watch.stop(); - /// Read successfully from page cache. ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheHit); ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheHitBytes, bytes_read); ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadBytes, bytes_read); - ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheHitElapsedMicroseconds, watch.elapsedMicroseconds()); - ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds()); promise.set_value({bytes_read, request.ignore}); return future; @@ -226,6 +224,12 @@ std::future ThreadPoolReader::submit(Request reques setThreadName("ThreadPoolRead"); Stopwatch watch(CLOCK_MONOTONIC); + SCOPE_EXIT({ + watch.stop(); + + ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheMissElapsedMicroseconds, watch.elapsedMicroseconds()); + ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds()); + }); size_t bytes_read = 0; while (!bytes_read) @@ -254,8 +258,6 @@ std::future ThreadPoolReader::submit(Request reques ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheMissBytes, bytes_read); ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadBytes, bytes_read); - ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheMissElapsedMicroseconds, watch.elapsedMicroseconds()); - ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds()); return Result{ .size = bytes_read, .offset = request.ignore }; });