Merge pull request #15144 from ClickHouse/aku/numa-perf

Perf test: bind server to one NUMA node
This commit is contained in:
Alexander Kuzmenkov 2020-10-23 14:10:35 +03:00 committed by GitHub
commit 4476117ac6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 46 additions and 10 deletions

View File

@ -9,6 +9,7 @@ RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \
bash \
curl \
dmidecode \
g++ \
gdb \
git \
@ -37,7 +38,18 @@ RUN apt-get update \
COPY * /
CMD /entrypoint.sh
# Bind everything to one NUMA node, if there's more than one. Theoretically the
# node #0 should be less stable because of system interruptions. We bind
# randomly to node 1 or 0 to gather some statistics on that. We have to bind
# both servers and the tmpfs on which the database is stored. How to do it
# through Yandex Sandbox API is unclear, but by default tmpfs uses
# 'process allocation policy', not sure which process but hopefully the one that
# writes to it, so just bind the downloader script as well. We could also try to
# remount it with proper options in Sandbox task.
# https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt
# Double-escaped backslashes are a tribute to the engineering wonder of docker --
# it gives '/bin/sh: 1: [bash,: not found' otherwise.
CMD ["bash", "-c", "node=$((RANDOM % $(numactl --hardware | sed -n 's/^.*available:\\(.*\\)nodes.*$/\\1/p'))); echo Will bind to NUMA node $node; numactl --cpunodebind=$node --membind=$node /entrypoint.sh"]
# docker run --network=host --volume <workspace>:/workspace --volume=<output>:/output -e PR_TO_TEST=<> -e SHA_TO_TEST=<> yandex/clickhouse-performance-comparison

View File

@ -77,20 +77,33 @@ function restart
while killall clickhouse-server; do echo . ; sleep 1 ; done
echo all killed
set -m # Spawn servers in their own process groups
# Disable percpu arenas because they segfault when the process is bound to
# a particular NUMA node: https://github.com/jemalloc/jemalloc/pull/1939
#
# About the jemalloc settings:
# https://github.com/jemalloc/jemalloc/wiki/Getting-Started
export MALLOC_CONF="percpu_arena:disabled,confirm_conf:true"
left/clickhouse-server --config-file=left/config/config.xml -- --path left/db --user_files_path left/db/user_files &>> left-server-log.log &
set -m # Spawn servers in their own process groups
left/clickhouse-server --config-file=left/config/config.xml \
-- --path left/db --user_files_path left/db/user_files \
&>> left-server-log.log &
left_pid=$!
kill -0 $left_pid
disown $left_pid
right/clickhouse-server --config-file=right/config/config.xml -- --path right/db --user_files_path right/db/user_files &>> right-server-log.log &
right/clickhouse-server --config-file=right/config/config.xml \
-- --path right/db --user_files_path right/db/user_files \
&>> right-server-log.log &
right_pid=$!
kill -0 $right_pid
disown $right_pid
set +m
unset MALLOC_CONF
wait_for_server 9001 $left_pid
echo left ok
@ -449,7 +462,12 @@ wait
unset IFS
)
parallel --joblog analyze/parallel-log.txt --null < analyze/commands.txt 2>> analyze/errors.log
# The comparison script might be bound to one NUMA node for better test
# stability, and the calculation runs out of memory because of this. Use
# all nodes.
numactl --show
numactl --cpunodebind=all --membind=all numactl --show
numactl --cpunodebind=all --membind=all parallel --joblog analyze/parallel-log.txt --null < analyze/commands.txt 2>> analyze/errors.log
clickhouse-local --query "
-- Join the metric names back to the metric statistics we've calculated, and make
@ -1070,8 +1088,10 @@ case "$stage" in
time configure
;&
"restart")
numactl --show ||:
numactl --hardware ||:
lscpu ||:
dmidecode -t 4 ||:
time restart
;&
"run_tests")

View File

@ -14,6 +14,9 @@
we might also add time check to perf.py script.
-->
<max_execution_time>300</max_execution_time>
<!-- One NUMA node w/o hyperthreading -->
<max_threads>20</max_threads>
</default>
</profiles>
</yandex>

View File

@ -5,6 +5,7 @@
<preconditions>
<table_exists>hits_100m_single</table_exists>
<table_exists>hits_10m_single</table_exists>
</preconditions>
@ -36,7 +37,7 @@
<query><![CDATA[SELECT count() FROM hits_100m_single WHERE multiMatchAny(URL, ['about/address', 'for_woman', '^https?://lm-company.ruy/$', 'ultimateguitar.com'])]]></query>
<query><![CDATA[SELECT count() FROM hits_100m_single WHERE match(URL, 'about/address|for_woman|^https?://lm-company.ruy/$|ultimateguitar.com')]]></query>
<query><![CDATA[SELECT count() FROM hits_100m_single WHERE match(URL, 'chelyabinsk.74.ru|doctor.74.ru|transport.74.ru|m.74.ru|//74.ru/|chel.74.ru|afisha.74.ru|diplom.74.ru|chelfin.ru|//chel.ru|chelyabinsk.ru|cheldoctor.ru|//mychel.ru|cheldiplom.ru|74.ru/video|market|poll|mail|conference|consult|contest|tags|feedback|pages|text')]]></query>
<query><![CDATA[SELECT count() FROM hits_10m_single WHERE match(URL, 'chelyabinsk.74.ru|doctor.74.ru|transport.74.ru|m.74.ru|//74.ru/|chel.74.ru|afisha.74.ru|diplom.74.ru|chelfin.ru|//chel.ru|chelyabinsk.ru|cheldoctor.ru|//mychel.ru|cheldiplom.ru|74.ru/video|market|poll|mail|conference|consult|contest|tags|feedback|pages|text')]]></query>
<query><![CDATA[SELECT count() FROM hits_100m_single WHERE multiMatchAny(URL, ['chelyabinsk.74.ru', 'doctor.74.ru', 'transport.74.ru', 'm.74.ru', '//74.ru/', 'chel.74.ru', 'afisha.74.ru', 'diplom.74.ru', 'chelfin.ru', '//chel.ru', 'chelyabinsk.ru', 'cheldoctor.ru', '//mychel.ru', 'cheldiplom.ru', '74.ru/video', 'market', 'poll', 'mail', 'conference', 'consult', 'contest', 'tags', 'feedback', 'pages', 'text'])]]></query>
<query><![CDATA[SELECT count() FROM hits_100m_single WHERE multiMatchAny(URL, ['chelyabinsk\\.74\\.ru', 'doctor\\.74\\.ru', 'transport\\.74\\.ru', 'm\\.74\\.ru', '//74\\.ru/', 'chel\\.74\\.ru', 'afisha\\.74\\.ru', 'diplom\\.74\\.ru', 'chelfin\\.ru', '//chel\\.ru', 'chelyabinsk\\.ru', 'cheldoctor\\.ru', '//mychel\\.ru', 'cheldiplom\\.ru', '74\\.ru/video', 'market', 'poll', 'mail', 'conference', 'consult', 'contest', 'tags', 'feedback', 'pages', 'text'])]]></query>

View File

@ -20,5 +20,5 @@
</substitution>
</substitutions>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore({func}URL))</query>
<query>SELECT ignore({func}URL)) FROM hits_100m_single LIMIT 50000000 FORMAT Null</query>
</test>

View File

@ -37,7 +37,7 @@
<query>SELECT UserID, count() FROM {table} GROUP BY UserID ORDER BY count() DESC LIMIT 10</query>
<query>SELECT UserID, SearchPhrase, count() FROM {table} GROUP BY UserID, SearchPhrase ORDER BY count() DESC LIMIT 10</query>
<query>SELECT UserID, SearchPhrase, count() FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10</query>
<query>SELECT UserID, toMinute(EventTime) AS m, SearchPhrase, count() FROM {table} GROUP BY UserID, m, SearchPhrase ORDER BY count() DESC LIMIT 10</query>
<query>SELECT UserID, toMinute(EventTime) AS m, SearchPhrase, count() FROM hits_10m_single GROUP BY UserID, m, SearchPhrase ORDER BY count() DESC LIMIT 10</query>
<query short="1">SELECT count() FROM hits_100m_single WHERE UserID = 12345678901234567890</query>
<query>SELECT count() FROM hits_100m_single WHERE URL LIKE '%metrika%'</query>
<query>SELECT SearchPhrase, any(URL), count() AS c FROM hits_100m_single WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10</query>
@ -52,8 +52,8 @@
<query>SELECT SearchEngineID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_100m_single WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10</query>
<query>SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10</query>
<query>SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m_single GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10</query>
<query>SELECT URL, count() AS c FROM {table} GROUP BY URL ORDER BY c DESC LIMIT 10</query>
<query>SELECT 1, URL, count() AS c FROM {table} GROUP BY 1, URL ORDER BY c DESC LIMIT 10</query>
<query>SELECT URL, count() AS c FROM hits_10m_single GROUP BY URL ORDER BY c DESC LIMIT 10</query>
<query>SELECT 1, URL, count() AS c FROM hits_10m_single GROUP BY 1, URL ORDER BY c DESC LIMIT 10</query>
<query>SELECT ClientIP AS x, x - 1, x - 2, x - 3, count() AS c FROM hits_100m_single GROUP BY x, x - 1, x - 2, x - 3 ORDER BY c DESC LIMIT 10</query>
<query>SELECT URL, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= '2013-07-01' AND EventDate &lt;= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND notEmpty(URL) GROUP BY URL ORDER BY PageViews DESC LIMIT 10 SETTINGS max_threads = 1</query>
<query>SELECT Title, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= '2013-07-01' AND EventDate &lt;= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND notEmpty(Title) GROUP BY Title ORDER BY PageViews DESC LIMIT 10</query>