Merge pull request #44824 from ClickHouse/stress-test-less-oom

Less OOM in stress test
This commit is contained in:
Alexey Milovidov 2023-01-03 22:12:34 +03:00 committed by GitHub
commit cb8e7c8059
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 60 additions and 34 deletions

View File

@ -53,6 +53,7 @@ function configure()
local total_mem
total_mem=$(awk '/MemTotal/ { print $(NF-1) }' /proc/meminfo) # KiB
total_mem=$(( total_mem*1024 )) # bytes
# Set maximum memory usage as half of total memory (less chance of OOM).
#
# But not via max_server_memory_usage but via max_memory_usage_for_user,
@ -65,16 +66,17 @@ function configure()
# max_server_memory_usage will be hard limit, and queries that should be
# executed regardless memory limits will use max_memory_usage_for_user=0,
# instead of relying on max_untracked_memory
local max_server_mem
max_server_mem=$((total_mem*75/100)) # 75%
echo "Setting max_server_memory_usage=$max_server_mem"
max_server_memory_usage_to_ram_ratio=0.5
echo "Setting max_server_memory_usage_to_ram_ratio to ${max_server_memory_usage_to_ram_ratio}"
cat > /etc/clickhouse-server/config.d/max_server_memory_usage.xml <<EOL
<clickhouse>
<max_server_memory_usage>${max_server_mem}</max_server_memory_usage>
<max_server_memory_usage_to_ram_ratio>${max_server_memory_usage_to_ram_ratio}</max_server_memory_usage_to_ram_ratio>
</clickhouse>
EOL
local max_users_mem
max_users_mem=$((total_mem*50/100)) # 50%
max_users_mem=$((total_mem*30/100)) # 30%
echo "Setting max_memory_usage_for_user=$max_users_mem"
cat > /etc/clickhouse-server/users.d/max_memory_usage_for_user.xml <<EOL
<clickhouse>
@ -97,6 +99,13 @@ EOL
-->
<core_path>$PWD</core_path>
</clickhouse>
EOL
# Let OOM killer terminate other processes before clickhouse-server:
cat > /etc/clickhouse-server/config.d/oom_score.xml <<EOL
<clickhouse>
<oom_score>-1000</oom_score>
</clickhouse>
EOL
# Analyzer is not yet ready for testing

View File

@ -420,6 +420,33 @@ void Server::createServer(
}
}
#if defined(OS_LINUX)
namespace
{
void setOOMScore(int value, Poco::Logger * log)
{
try
{
std::string value_string = std::to_string(value);
DB::WriteBufferFromFile buf("/proc/self/oom_score_adj");
buf.write(value_string.c_str(), value_string.size());
buf.next();
buf.close();
}
catch (const Poco::Exception & e)
{
LOG_WARNING(log, "Failed to adjust OOM score: '{}'.", e.displayText());
return;
}
LOG_INFO(log, "Set OOM score adjustment to {}", value);
}
}
#endif
void Server::uninitialize()
{
logger().information("shutting down");
@ -881,6 +908,21 @@ try
}
}
}
int default_oom_score = 0;
#if !defined(NDEBUG)
/// In debug version on Linux, increase oom score so that clickhouse is killed
/// first, instead of some service. Use a carefully chosen random score of 555:
/// the maximum is 1000, and chromium uses 300 for its tab processes. Ignore
/// whatever errors that occur, because it's just a debugging aid and we don't
/// care if it breaks.
default_oom_score = 555;
#endif
int oom_score = config().getInt("oom_score", default_oom_score);
if (oom_score)
setOOMScore(oom_score, log);
#endif
global_context->setRemoteHostFilter(config());

View File

@ -1464,4 +1464,8 @@
I don't recommend to change this setting.
<show_addresses_in_stack_traces>false</show_addresses_in_stack_traces>
-->
<!-- On Linux systems this can control the behavior of OOM killer.
<oom_score>-1000</oom_score>
-->
</clickhouse>

View File

@ -602,34 +602,6 @@ void BaseDaemon::closeFDs()
}
}
namespace
{
/// In debug version on Linux, increase oom score so that clickhouse is killed
/// first, instead of some service. Use a carefully chosen random score of 555:
/// the maximum is 1000, and chromium uses 300 for its tab processes. Ignore
/// whatever errors that occur, because it's just a debugging aid and we don't
/// care if it breaks.
#if defined(OS_LINUX) && !defined(NDEBUG)
void debugIncreaseOOMScore()
{
const std::string new_score = "555";
try
{
DB::WriteBufferFromFile buf("/proc/self/oom_score_adj");
buf.write(new_score.c_str(), new_score.size());
buf.close();
}
catch (const Poco::Exception & e)
{
LOG_WARNING(&Poco::Logger::root(), "Failed to adjust OOM score: '{}'.", e.displayText());
return;
}
LOG_INFO(&Poco::Logger::root(), "Set OOM score adjustment to {}", new_score);
}
#else
void debugIncreaseOOMScore() {}
#endif
}
void BaseDaemon::initialize(Application & self)
{
@ -796,7 +768,6 @@ void BaseDaemon::initialize(Application & self)
initializeTerminationAndSignalProcessing();
logRevision();
debugIncreaseOOMScore();
for (const auto & key : DB::getMultipleKeysFromConfig(config(), "", "graphite"))
{