mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-09 17:14:47 +00:00
Merge pull request #66732 from ClickHouse/more_debug_info_for_cgroup_observer
Dump all memory stats in CgroupsMemoryUsageObserver on hitting the limit
This commit is contained in:
commit
5e41c29a90
@ -11,6 +11,7 @@
|
||||
#include <base/cgroupsv2.h>
|
||||
#include <base/getMemoryAmount.h>
|
||||
#include <base/sleep.h>
|
||||
#include <fmt/ranges.h>
|
||||
|
||||
#include <cstdint>
|
||||
#include <filesystem>
|
||||
@ -45,26 +46,33 @@ namespace
|
||||
/// kernel 5
|
||||
/// rss 15
|
||||
/// [...]
|
||||
uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & key)
|
||||
using Metrics = std::map<std::string, uint64_t>;
|
||||
|
||||
Metrics readAllMetricsFromStatFile(ReadBufferFromFile & buf)
|
||||
{
|
||||
Metrics metrics;
|
||||
while (!buf.eof())
|
||||
{
|
||||
std::string current_key;
|
||||
readStringUntilWhitespace(current_key, buf);
|
||||
if (current_key != key)
|
||||
{
|
||||
std::string dummy;
|
||||
readStringUntilNewlineInto(dummy, buf);
|
||||
buf.ignore();
|
||||
continue;
|
||||
}
|
||||
|
||||
assertChar(' ', buf);
|
||||
|
||||
uint64_t value = 0;
|
||||
readIntText(value, buf);
|
||||
return value;
|
||||
}
|
||||
assertChar('\n', buf);
|
||||
|
||||
auto [_, inserted] = metrics.emplace(std::move(current_key), value);
|
||||
chassert(inserted, "Duplicate keys in stat file");
|
||||
}
|
||||
return metrics;
|
||||
}
|
||||
|
||||
uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & key)
|
||||
{
|
||||
const auto all_metrics = readAllMetricsFromStatFile(buf);
|
||||
if (const auto it = all_metrics.find(key); it != all_metrics.end())
|
||||
return it->second;
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot find '{}' in '{}'", key, buf.getFileName());
|
||||
}
|
||||
|
||||
@ -79,6 +87,13 @@ struct CgroupsV1Reader : ICgroupsReader
|
||||
return readMetricFromStatFile(buf, "rss");
|
||||
}
|
||||
|
||||
std::string dumpAllStats() override
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
buf.rewind();
|
||||
return fmt::format("{}", readAllMetricsFromStatFile(buf));
|
||||
}
|
||||
|
||||
private:
|
||||
std::mutex mutex;
|
||||
ReadBufferFromFile buf TSA_GUARDED_BY(mutex);
|
||||
@ -106,6 +121,13 @@ struct CgroupsV2Reader : ICgroupsReader
|
||||
return mem_usage;
|
||||
}
|
||||
|
||||
std::string dumpAllStats() override
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
stat_buf.rewind();
|
||||
return fmt::format("{}", readAllMetricsFromStatFile(stat_buf));
|
||||
}
|
||||
|
||||
private:
|
||||
std::mutex mutex;
|
||||
ReadBufferFromFile current_buf TSA_GUARDED_BY(mutex);
|
||||
@ -178,10 +200,7 @@ CgroupsMemoryUsageObserver::CgroupsMemoryUsageObserver(std::chrono::seconds wait
|
||||
{
|
||||
const auto [cgroup_path, version] = getCgroupsPath();
|
||||
|
||||
if (version == CgroupsVersion::V2)
|
||||
cgroup_reader = std::make_unique<CgroupsV2Reader>(cgroup_path);
|
||||
else
|
||||
cgroup_reader = std::make_unique<CgroupsV1Reader>(cgroup_path);
|
||||
cgroup_reader = createCgroupsReader(version, cgroup_path);
|
||||
|
||||
LOG_INFO(
|
||||
log,
|
||||
@ -234,7 +253,12 @@ void CgroupsMemoryUsageObserver::setMemoryUsageLimits(uint64_t hard_limit_, uint
|
||||
# endif
|
||||
/// Reset current usage in memory tracker. Expect zero for free_memory_in_allocator_arenas as we just purged them.
|
||||
uint64_t memory_usage = cgroup_reader->readMemoryUsage();
|
||||
LOG_TRACE(log, "Read current memory usage {} bytes ({}) from cgroups", memory_usage, ReadableSize(memory_usage));
|
||||
LOG_TRACE(
|
||||
log,
|
||||
"Read current memory usage {} bytes ({}) from cgroups, full available stats: {}",
|
||||
memory_usage,
|
||||
ReadableSize(memory_usage),
|
||||
cgroup_reader->dumpAllStats());
|
||||
MemoryTracker::setRSS(memory_usage, 0);
|
||||
|
||||
LOG_INFO(log, "Purged jemalloc arenas. Current memory usage is {}", ReadableSize(memory_usage));
|
||||
@ -338,6 +362,13 @@ void CgroupsMemoryUsageObserver::runThread()
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<ICgroupsReader> createCgroupsReader(CgroupsMemoryUsageObserver::CgroupsVersion version, const fs::path & cgroup_path)
|
||||
{
|
||||
if (version == CgroupsMemoryUsageObserver::CgroupsVersion::V2)
|
||||
return std::make_unique<CgroupsV2Reader>(cgroup_path);
|
||||
else
|
||||
return std::make_unique<CgroupsV1Reader>(cgroup_path);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -14,6 +14,8 @@ struct ICgroupsReader
|
||||
virtual ~ICgroupsReader() = default;
|
||||
|
||||
virtual uint64_t readMemoryUsage() = 0;
|
||||
|
||||
virtual std::string dumpAllStats() = 0;
|
||||
};
|
||||
|
||||
/// Does two things:
|
||||
@ -81,6 +83,9 @@ private:
|
||||
bool quit = false;
|
||||
};
|
||||
|
||||
std::unique_ptr<ICgroupsReader>
|
||||
createCgroupsReader(CgroupsMemoryUsageObserver::CgroupsVersion version, const std::filesystem::path & cgroup_path);
|
||||
|
||||
#else
|
||||
class CgroupsMemoryUsageObserver
|
||||
{
|
||||
|
178
src/Common/tests/gtest_cgroups_reader.cpp
Normal file
178
src/Common/tests/gtest_cgroups_reader.cpp
Normal file
@ -0,0 +1,178 @@
|
||||
#if defined(OS_LINUX)
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <cstdint>
|
||||
#include <filesystem>
|
||||
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <Common/CgroupsMemoryUsageObserver.h>
|
||||
#include <Common/filesystemHelpers.h>
|
||||
|
||||
using namespace DB;
|
||||
|
||||
|
||||
const std::string SAMPLE_FILE[2] = {
|
||||
R"(cache 4673703936
|
||||
rss 2232029184
|
||||
rss_huge 0
|
||||
shmem 0
|
||||
mapped_file 344678400
|
||||
dirty 4730880
|
||||
writeback 135168
|
||||
swap 0
|
||||
pgpgin 2038569918
|
||||
pgpgout 2036883790
|
||||
pgfault 2055373287
|
||||
pgmajfault 0
|
||||
inactive_anon 2156335104
|
||||
active_anon 0
|
||||
inactive_file 2841305088
|
||||
active_file 1653915648
|
||||
unevictable 256008192
|
||||
hierarchical_memory_limit 8589934592
|
||||
hierarchical_memsw_limit 8589934592
|
||||
total_cache 4673703936
|
||||
total_rss 2232029184
|
||||
total_rss_huge 0
|
||||
total_shmem 0
|
||||
total_mapped_file 344678400
|
||||
total_dirty 4730880
|
||||
total_writeback 135168
|
||||
total_swap 0
|
||||
total_pgpgin 2038569918
|
||||
total_pgpgout 2036883790
|
||||
total_pgfault 2055373287
|
||||
total_pgmajfault 0
|
||||
total_inactive_anon 2156335104
|
||||
total_active_anon 0
|
||||
total_inactive_file 2841305088
|
||||
total_active_file 1653915648
|
||||
total_unevictable 256008192
|
||||
)",
|
||||
R"(anon 10429399040
|
||||
file 17410793472
|
||||
kernel 1537789952
|
||||
kernel_stack 3833856
|
||||
pagetables 65441792
|
||||
sec_pagetables 0
|
||||
percpu 15232
|
||||
sock 0
|
||||
vmalloc 0
|
||||
shmem 0
|
||||
zswap 0
|
||||
zswapped 0
|
||||
file_mapped 344010752
|
||||
file_dirty 2060857344
|
||||
file_writeback 0
|
||||
swapcached 0
|
||||
anon_thp 0
|
||||
file_thp 0
|
||||
shmem_thp 0
|
||||
inactive_anon 0
|
||||
active_anon 10429370368
|
||||
inactive_file 8693084160
|
||||
active_file 8717561856
|
||||
unevictable 0
|
||||
slab_reclaimable 1460982504
|
||||
slab_unreclaimable 5152864
|
||||
slab 1466135368
|
||||
workingset_refault_anon 0
|
||||
workingset_refault_file 0
|
||||
workingset_activate_anon 0
|
||||
workingset_activate_file 0
|
||||
workingset_restore_anon 0
|
||||
workingset_restore_file 0
|
||||
workingset_nodereclaim 0
|
||||
pgscan 0
|
||||
pgsteal 0
|
||||
pgscan_kswapd 0
|
||||
pgscan_direct 0
|
||||
pgscan_khugepaged 0
|
||||
pgsteal_kswapd 0
|
||||
pgsteal_direct 0
|
||||
pgsteal_khugepaged 0
|
||||
pgfault 43026352
|
||||
pgmajfault 36762
|
||||
pgrefill 0
|
||||
pgactivate 0
|
||||
pgdeactivate 0
|
||||
pglazyfree 259
|
||||
pglazyfreed 0
|
||||
zswpin 0
|
||||
zswpout 0
|
||||
thp_fault_alloc 0
|
||||
thp_collapse_alloc 0
|
||||
)"};
|
||||
|
||||
const std::string EXPECTED[2]
|
||||
= {"{\"active_anon\": 0, \"active_file\": 1653915648, \"cache\": 4673703936, \"dirty\": 4730880, \"hierarchical_memory_limit\": "
|
||||
"8589934592, \"hierarchical_memsw_limit\": 8589934592, \"inactive_anon\": 2156335104, \"inactive_file\": 2841305088, "
|
||||
"\"mapped_file\": 344678400, \"pgfault\": 2055373287, \"pgmajfault\": 0, \"pgpgin\": 2038569918, \"pgpgout\": 2036883790, \"rss\": "
|
||||
"2232029184, \"rss_huge\": 0, \"shmem\": 0, \"swap\": 0, \"total_active_anon\": 0, \"total_active_file\": 1653915648, "
|
||||
"\"total_cache\": 4673703936, \"total_dirty\": 4730880, \"total_inactive_anon\": 2156335104, \"total_inactive_file\": 2841305088, "
|
||||
"\"total_mapped_file\": 344678400, \"total_pgfault\": 2055373287, \"total_pgmajfault\": 0, \"total_pgpgin\": 2038569918, "
|
||||
"\"total_pgpgout\": 2036883790, \"total_rss\": 2232029184, \"total_rss_huge\": 0, \"total_shmem\": 0, \"total_swap\": 0, "
|
||||
"\"total_unevictable\": 256008192, \"total_writeback\": 135168, \"unevictable\": 256008192, \"writeback\": 135168}",
|
||||
"{\"active_anon\": 10429370368, \"active_file\": 8717561856, \"anon\": 10429399040, \"anon_thp\": 0, \"file\": 17410793472, "
|
||||
"\"file_dirty\": 2060857344, \"file_mapped\": 344010752, \"file_thp\": 0, \"file_writeback\": 0, \"inactive_anon\": 0, "
|
||||
"\"inactive_file\": 8693084160, \"kernel\": 1537789952, \"kernel_stack\": 3833856, \"pagetables\": 65441792, \"percpu\": 15232, "
|
||||
"\"pgactivate\": 0, \"pgdeactivate\": 0, \"pgfault\": 43026352, \"pglazyfree\": 259, \"pglazyfreed\": 0, \"pgmajfault\": 36762, "
|
||||
"\"pgrefill\": 0, \"pgscan\": 0, \"pgscan_direct\": 0, \"pgscan_khugepaged\": 0, \"pgscan_kswapd\": 0, \"pgsteal\": 0, "
|
||||
"\"pgsteal_direct\": 0, \"pgsteal_khugepaged\": 0, \"pgsteal_kswapd\": 0, \"sec_pagetables\": 0, \"shmem\": 0, \"shmem_thp\": 0, "
|
||||
"\"slab\": 1466135368, \"slab_reclaimable\": 1460982504, \"slab_unreclaimable\": 5152864, \"sock\": 0, \"swapcached\": 0, "
|
||||
"\"thp_collapse_alloc\": 0, \"thp_fault_alloc\": 0, \"unevictable\": 0, \"vmalloc\": 0, \"workingset_activate_anon\": 0, "
|
||||
"\"workingset_activate_file\": 0, \"workingset_nodereclaim\": 0, \"workingset_refault_anon\": 0, \"workingset_refault_file\": 0, "
|
||||
"\"workingset_restore_anon\": 0, \"workingset_restore_file\": 0, \"zswap\": 0, \"zswapped\": 0, \"zswpin\": 0, \"zswpout\": 0}"};
|
||||
|
||||
|
||||
class CgroupsMemoryUsageObserverFixture : public ::testing::TestWithParam<CgroupsMemoryUsageObserver::CgroupsVersion>
|
||||
{
|
||||
void SetUp() override
|
||||
{
|
||||
const uint8_t version = static_cast<uint8_t>(GetParam());
|
||||
tmp_dir = fmt::format("./test_cgroups_{}", magic_enum::enum_name(GetParam()));
|
||||
fs::create_directories(tmp_dir);
|
||||
|
||||
auto stat_file = WriteBufferFromFile(tmp_dir + "/memory.stat");
|
||||
stat_file.write(SAMPLE_FILE[version].data(), SAMPLE_FILE[version].size());
|
||||
stat_file.sync();
|
||||
|
||||
if (GetParam() == CgroupsMemoryUsageObserver::CgroupsVersion::V2)
|
||||
{
|
||||
auto current_file = WriteBufferFromFile(tmp_dir + "/memory.current");
|
||||
current_file.write("29645422592", 11);
|
||||
current_file.sync();
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
std::string tmp_dir;
|
||||
};
|
||||
|
||||
|
||||
TEST_P(CgroupsMemoryUsageObserverFixture, ReadMemoryUsageTest)
|
||||
{
|
||||
const auto version = GetParam();
|
||||
auto reader = createCgroupsReader(version, tmp_dir);
|
||||
ASSERT_EQ(
|
||||
reader->readMemoryUsage(),
|
||||
version == CgroupsMemoryUsageObserver::CgroupsVersion::V1 ? /* rss from memory.stat */ 2232029184
|
||||
: /* value from memory.current - inactive_file */ 20952338432);
|
||||
}
|
||||
|
||||
|
||||
TEST_P(CgroupsMemoryUsageObserverFixture, DumpAllStatsTest)
|
||||
{
|
||||
const auto version = GetParam();
|
||||
auto reader = createCgroupsReader(version, tmp_dir);
|
||||
ASSERT_EQ(reader->dumpAllStats(), EXPECTED[static_cast<uint8_t>(version)]);
|
||||
}
|
||||
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
CgroupsMemoryUsageObserverTests,
|
||||
CgroupsMemoryUsageObserverFixture,
|
||||
::testing::Values(CgroupsMemoryUsageObserver::CgroupsVersion::V1, CgroupsMemoryUsageObserver::CgroupsVersion::V2));
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user