diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile
index abdf8130c14..912a8bd12cd 100644
--- a/docker/test/performance-comparison/Dockerfile
+++ b/docker/test/performance-comparison/Dockerfile
@@ -12,6 +12,8 @@ RUN apt-get update \
             g++ \
             gdb \
             git \
+            gnuplot \
+            imagemagick \
             libc6-dbg \
             moreutils \
             ncdu \
diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh
index 0a205796c9d..f39b8b593ab 100755
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@@ -36,18 +36,14 @@ function configure
     while killall clickhouse-server; do echo . ; sleep 1 ; done
     echo all killed
 
-    # Remove logs etc, because they will be updated, and sharing them between
-    # servers with hardlink might cause unpredictable behavior.
-    rm db0/data/system/* -rf ||:
-    rm db0/metadata/system/* -rf ||:
-
     # Make copies of the original db for both servers. Use hardlinks instead
-    # of copying. Be careful to remove preprocessed configs and system tables,or
-    # it can lead to weird effects.
+    # of copying to save space. Before that, remove preprocessed configs and
+    # system tables, because sharing them between servers with hardlinks may
+    # lead to weird effects.
     rm -r left/db ||:
     rm -r right/db ||:
     rm -r db0/preprocessed_configs ||:
-    rm -r db/{data,metadata}/system ||:
+    rm -r db0/{data,metadata}/system ||:
     cp -al db0/ left/db/
     cp -al db0/ right/db/
 }
@@ -143,15 +139,18 @@ function run_tests
         touch "$x"
     done
 
+    # Randomize test order.
+    test_files=$(for f in $test_files; do echo "$f"; done | sort -R)
+
     # Run the tests.
     test_name="<none>"
     for test in $test_files
     do
-        # Check that both servers are alive, to fail faster if they die.
+        # Check that both servers are alive, and restart them if they die.
         clickhouse-client --port 9001 --query "select 1 format Null" \
-            || { echo $test_name >> left-server-died.log ; restart ; continue ; }
+            || { echo $test_name >> left-server-died.log ; restart ; }
         clickhouse-client --port 9002 --query "select 1 format Null" \
-            || { echo $test_name >> right-server-died.log ; restart ; continue ; }
+            || { echo $test_name >> right-server-died.log ; restart ; }
 
         test_name=$(basename "$test" ".xml")
         echo test "$test_name"
@@ -217,10 +216,13 @@ function get_profiles
     # Collect the profiles
     clickhouse-client --port 9001 --query "set query_profiler_cpu_time_period_ns = 0"
     clickhouse-client --port 9001 --query "set query_profiler_real_time_period_ns = 0"
-    clickhouse-client --port 9001 --query "set query_profiler_cpu_time_period_ns = 0"
-    clickhouse-client --port 9001 --query "set query_profiler_real_time_period_ns = 0"
-    clickhouse-client --port 9001 --query "system flush logs"
-    clickhouse-client --port 9002 --query "system flush logs"
+    clickhouse-client --port 9001 --query "system flush logs" &
+
+    clickhouse-client --port 9002 --query "set query_profiler_cpu_time_period_ns = 0"
+    clickhouse-client --port 9002 --query "set query_profiler_real_time_period_ns = 0"
+    clickhouse-client --port 9002 --query "system flush logs" &
+
+    wait
 
     clickhouse-client --port 9001 --query "select * from system.query_log where type = 2 format TSVWithNamesAndTypes" > left-query-log.tsv ||: &
     clickhouse-client --port 9001 --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > left-query-thread-log.tsv ||: &
@@ -292,25 +294,54 @@ create view right_query_log as select *
     from file('right-query-log.tsv', TSVWithNamesAndTypes,
         '$(cat "right-query-log.tsv.columns")');
 
-create table query_metrics engine File(TSV, -- do not add header -- will parse with grep
-        'analyze/query-run-metrics.tsv')
-    as select
-        test, query_index, 0 run, version,
-        [
-            -- server-reported time
-            query_duration_ms / toFloat64(1000)
-            , toFloat64(memory_usage)
-            -- client-reported time
-            , query_runs.time
-        ] metrics
-    from (
-        select query_duration_ms, memory_usage, query_id, 0 version from left_query_log
-        union all
-        select query_duration_ms, memory_usage, query_id, 1 version from right_query_log
-    ) query_logs
+create view query_logs as
+    select *, 0 version from left_query_log
+    union all
+    select *, 1 version from right_query_log
+    ;
+
+create table query_run_metrics_full engine File(TSV, 'analyze/query-run-metrics-full.tsv')
+    as
+    with (
+        -- sumMapState with the list of all keys with '-0.' values. Negative zero is because
+        -- sumMap removes keys with positive zeros.
+        with (select groupUniqArrayArray(ProfileEvents.Names) from query_logs) as all_names
+            select arrayReduce('sumMapState', [(all_names, arrayMap(x->-0., all_names))])
+        ) as all_metrics
+    select test, query_index, version, query_id,
+        (finalizeAggregation(
+            arrayReduce('sumMapMergeState',
+                [
+                    all_metrics,
+                    arrayReduce('sumMapState',
+                        [(ProfileEvents.Names,
+                            arrayMap(x->toFloat64(x), ProfileEvents.Values))]
+                    ),
+                    arrayReduce('sumMapState', [(
+                        ['client_time', 'server_time'],
+                        arrayMap(x->if(x != 0., x, -0.), [
+                            toFloat64(query_runs.time),
+                            toFloat64(query_duration_ms / 1000.)]))])
+                ]
+            )) as metrics_tuple).1 metric_names,
+        metrics_tuple.2 metric_values
+    from query_logs
     right join query_runs
-    using (query_id, version)
-    order by test, query_index
+        on query_logs.query_id = query_runs.query_id
+            and query_logs.version = query_runs.version
+    ;
+
+create table query_run_metrics engine File(
+        TSV, -- do not add header -- will parse with grep
+        'analyze/query-run-metrics.tsv')
+    as select test, query_index, 0 run, version, metric_values
+    from query_run_metrics_full
+    where test = 'arithmetic'
+    order by test, query_index, run, version
+    ;
+
+create table query_run_metric_names engine File(TSV, 'analyze/query-run-metric-names.tsv')
+    as select metric_names from query_run_metrics_full limit 1
     ;
 "
 
@@ -320,7 +351,6 @@ create table query_metrics engine File(TSV, -- do not add header -- will parse w
 # query. We also don't have lateral joins. So I just put all runs of each
 # query into a separate file, and then compute randomization distribution
 # for each file. I do this in parallel using GNU parallel.
-query_index=1
 IFS=$'\n'
 for prefix in $(cut -f1,2 "analyze/query-run-metrics.tsv" | sort | uniq)
 do
@@ -360,20 +390,34 @@ create view query_display_names as select * from
         'test text, query_index int, query_display_name text')
     ;
 
-create table query_metric_stats engine File(TSVWithNamesAndTypes,
-        'report/query-metric-stats.tsv') as
+-- WITH, ARRAY JOIN and CROSS JOIN do not like each other:
+--  https://github.com/ClickHouse/ClickHouse/issues/11868
+--  https://github.com/ClickHouse/ClickHouse/issues/11757
+-- Because of this, we make a view with arrays first, and then apply all the
+-- array joins.
+
+create view query_metric_stat_arrays as
+    with (select * from file('analyze/query-run-metric-names.tsv',
+        TSV, 'n Array(String)')) as metric_name
     select metric_name, left, right, diff, stat_threshold, test, query_index,
         query_display_name
     from file ('analyze/query-reports.tsv', TSV, 'left Array(float),
         right Array(float), diff Array(float), stat_threshold Array(float),
         test text, query_index int') reports
-    left array join ['server_time', 'memory', 'client_time'] as metric_name,
-        left, right, diff, stat_threshold
     left join query_display_names
         on reports.test = query_display_names.test
             and reports.query_index = query_display_names.query_index
     ;
 
+create table query_metric_stats engine File(TSVWithNamesAndTypes,
+        'report/query-metric-stats.tsv')
+    as
+    select metric_name, left, right, diff, stat_threshold, test, query_index,
+        query_display_name
+    from query_metric_stat_arrays
+    left array join metric_name, left, right, diff, stat_threshold
+    ;
+
 -- Main statistics for queries -- query time as reported in query log.
 create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
     as select
@@ -392,7 +436,8 @@ create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
         not short and not changed_show and stat_threshold > report_threshold - 0.05 as unstable_show,
         
         left, right, diff, stat_threshold,
-        if(report_threshold > 0, report_threshold, 0.10) as report_threshold,
+        --if(report_threshold > 0, report_threshold, 0.10) as report_threshold,
+        0.10 as report_threshold,
         test, query_index, query_display_name
     from query_metric_stats
     left join file('analyze/report-thresholds.tsv', TSV,
@@ -728,6 +773,64 @@ unset IFS
 grep -H -m2 -i '\(Exception\|Error\):[^:]' ./*-err.log | sed 's/:/\t/' >> run-errors.tsv ||:
 }
 
+function report_metrics
+{
+rm -rf metrics ||:
+mkdir metrics
+
+clickhouse-local --stacktrace --verbose --query "
+create view right_async_metric_log as
+    select * from file('right-async-metric-log.tsv', TSVWithNamesAndTypes,
+        'event_date Date, event_time DateTime, name String, value Float64')
+    ;
+
+-- Use the right log as time reference because it may have higher precision.
+create table metrics engine File(TSV, 'metrics/metrics.tsv') as
+    with (select min(event_time) from right_async_metric_log) as min_time
+    select name metric, r.event_time - min_time event_time, l.value as left, r.value as right
+    from right_async_metric_log r
+    asof join file('left-async-metric-log.tsv', TSVWithNamesAndTypes,
+        'event_date Date, event_time DateTime, name String, value Float64') l
+    on l.name = r.name and r.event_time <= l.event_time
+    order by metric, event_time
+    ;
+
+-- Show metrics that have changed
+create table changes engine File(TSV, 'metrics/changes.tsv') as
+    select metric, median(left) as left, median(right) as right,
+        floor((right - left) / left, 3) diff,
+        floor(if(left > right, left / right, right / left), 3) times_diff
+    from metrics
+    group by metric
+    having abs(diff) > 0.05 and isFinite(diff)
+    order by diff desc
+    ;
+"
+
+IFS=$'\n'
+for prefix in $(cut -f1 "metrics/metrics.tsv" | sort | uniq)
+do
+    file="metrics/$prefix.tsv"
+    grep "^$prefix	" "metrics/metrics.tsv" | cut -f2- > "$file"
+
+    gnuplot -e "
+        set datafile separator '\t';
+        set terminal png size 960,540;
+        set xtics time format '%tH:%tM';
+        set title '$prefix' noenhanced offset 0,-3;
+        set key left top;
+        plot
+            '$file' using 1:2 with lines title 'Left'
+            , '$file' using 1:3 with lines title 'Right'
+            ;
+    " \
+        | convert - -filter point -resize "200%" "metrics/$prefix.png" &
+
+done
+wait
+unset IFS
+}
+
 # Check that local and client are in PATH
 clickhouse-local --version > /dev/null
 clickhouse-client --version > /dev/null
@@ -749,12 +852,24 @@ case "$stage" in
     time run_benchmark 2> >(tee -a run-errors.tsv 1>&2) ||:
     ;&
 "get_profiles")
-    # Getting profiles inexplicably hangs sometimes, so try to save some logs if
-    # this happens again. Give the servers some time to collect all info, then
-    # trace and kill. Start in a subshell, so that both function don't interfere
-    # with each other's jobs through `wait`. Also make the subshell have its own
-    # process group, so that we can then kill it with all its child processes.
-    # Somehow it doesn't kill the children by itself when dying.
+    # Check for huge pages.
+    cat /sys/kernel/mm/transparent_hugepage/enabled > thp-enabled.txt ||:
+    cat /proc/meminfo > meminfo.txt ||:
+    for pid in $(pgrep -f clickhouse-server)
+    do
+        cat "/proc/$pid/smaps" > "$pid-smaps.txt" ||:
+    done
+
+    # Sleep for five minutes to see how the servers enter a quiescent state (e.g.
+    # how fast the memory usage drops).
+    sleep 300
+
+    # We had a bug where getting profiles froze sometimes, so try to save some
+    # logs if this happens again. Give the servers some time to collect all info,
+    # then trace and kill. Start in a subshell, so that both function don't
+    # interfere with each other's jobs through `wait`. Also make the subshell
+    # have its own process group, so that we can then kill it with all its child
+    # processes. Somehow it doesn't kill the children by itself when dying.
     set -m
     ( get_profiles_watchdog ) &
     watchdog_pid=$!
@@ -781,7 +896,11 @@ case "$stage" in
     ;&
 "report")
     time report ||:
-
+    ;&
+"report_metrics")
+    time report_metrics ||:
+    ;&
+"report_html")
     time "$script_dir/report.py" --report=all-queries > all-queries.html 2> >(tee -a report/errors.log 1>&2) ||:
     time "$script_dir/report.py" > report.html
     ;&
diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh
index 1a9438cd60f..f5bc04b4441 100755
--- a/docker/test/performance-comparison/entrypoint.sh
+++ b/docker/test/performance-comparison/entrypoint.sh
@@ -133,6 +133,6 @@ dmesg -T > dmesg.log
 
 7z a '-x!*/tmp' /output/output.7z ./*.{log,tsv,html,txt,rep,svg,columns} \
     {right,left}/{performance,scripts} {{right,left}/db,db0}/preprocessed_configs \
-    report analyze benchmark
+    report analyze benchmark metrics
 
 cp compare.log /output
diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py
index 3af710606c2..227722a8bea 100755
--- a/docker/test/performance-comparison/report.py
+++ b/docker/test/performance-comparison/report.py
@@ -191,7 +191,7 @@ if args.report == 'main':
     slow_on_client_rows = tsvRows('report/slow-on-client.tsv')
     error_tests += len(slow_on_client_rows)
     printSimpleTable('Slow on client',
-                     ['Client time, s', 'Server time, s', 'Ratio', 'Test', 'Query'],
+                     ['Client time,&nbsp;s', 'Server time,&nbsp;s', 'Ratio', 'Test', 'Query'],
                      slow_on_client_rows)
 
     def print_changes():
@@ -203,8 +203,8 @@ if args.report == 'main':
 
         print(tableStart('Changes in performance'))
         columns = [
-            'Old, s',                                          # 0
-            'New, s',                                          # 1
+            'Old,&nbsp;s',                                          # 0
+            'New,&nbsp;s',                                          # 1
             'Relative difference (new&nbsp;&minus;&nbsp;old) / old',   # 2
             'p&nbsp;<&nbsp;0.001 threshold',                   # 3
             # Failed                                           # 4
@@ -245,8 +245,8 @@ if args.report == 'main':
         unstable_queries += len(unstable_rows)
 
         columns = [
-            'Old, s', #0
-            'New, s', #1
+            'Old,&nbsp;s', #0
+            'New,&nbsp;s', #1
             'Relative difference (new&nbsp;-&nbsp;old)/old', #2
             'p&nbsp;<&nbsp;0.001 threshold', #3
             # Failed #4
@@ -288,13 +288,13 @@ if args.report == 'main':
 
         columns = [
             'Test',                                          #0
-            'Wall clock time, s',                            #1
-            'Total client time, s',                          #2
+            'Wall clock time,&nbsp;s',                            #1
+            'Total client time,&nbsp;s',                          #2
             'Total queries',                                 #3
             'Ignored short queries',                         #4
-            'Longest query<br>(sum for all runs), s',        #5
-            'Avg wall clock time<br>(sum for all runs), s',  #6
-            'Shortest query<br>(sum for all runs), s',       #7
+            'Longest query<br>(sum for all runs),&nbsp;s',        #5
+            'Avg wall clock time<br>(sum for all runs),&nbsp;s',  #6
+            'Shortest query<br>(sum for all runs),&nbsp;s',       #7
             ]
 
         print(tableStart('Test times'))
@@ -384,6 +384,11 @@ if args.report == 'main':
                 *sys.exc_info()[:2])[-1])
         pass
 
+    printSimpleTable('Metric changes',
+        ['Metric', 'Old median value', 'New median value',
+            'Relative difference', 'Times difference'],
+        tsvRows('metrics/changes.tsv'))
+
     print_report_errors()
 
     print("""
@@ -450,8 +455,8 @@ elif args.report == 'all-queries':
         columns = [
             # Changed #0
             # Unstable #1
-            'Old, s', #2
-            'New, s', #3
+            'Old,&nbsp;s', #2
+            'New,&nbsp;s', #3
             'Relative difference (new&nbsp;&minus;&nbsp;old) / old', #4
             'Times speedup / slowdown',                 #5
             'p&nbsp;<&nbsp;0.001 threshold',          #6