diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh
index 2680464c54a..3e5b6111288 100755
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@@ -369,7 +369,7 @@ create table metric_devation engine File(TSVWithNamesAndTypes, 'metric-deviation
         quantilesExact(0, 0.5, 1)(value) q, metric, query
     from (select * from unstable_run_metrics
         union all select * from unstable_run_traces
-        union all select * from unstable_run_metrics_2)
+        union all select * from unstable_run_metrics_2) mm
     join queries using query
     group by query, metric
     having d > 0.5
@@ -406,7 +406,7 @@ unset IFS
 
 # Remember that grep sets error code when nothing is found, hence the bayan
 # operator
-grep Exception:[^:] *-err.log > run-errors.log ||:
+grep -m2 Exception:[^:] *-err.log | sed 's/:/\t/' > run-errors.tsv ||:
 
 $script_dir/report.py > report.html
 }
diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh
index 3fc79f7a110..36e6a393b28 100755
--- a/docker/test/performance-comparison/entrypoint.sh
+++ b/docker/test/performance-comparison/entrypoint.sh
@@ -59,6 +59,17 @@ set -m
 time ../compare.sh 0 $ref_sha $PR_TO_TEST $SHA_TO_TEST 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee compare.log
 set +m
 
+# Stop the servers to free memory. Normally they are restarted before getting
+# the profile info, so they shouldn't use much, but if the comparison script
+# fails in the middle, this might not be the case.
+for i in {1..30}
+do
+    if ! killall clickhouse
+    then
+        break
+    fi
+done
+
 dmesg -T > dmesg.log
 
 7z a /output/output.7z *.log *.tsv *.html *.txt *.rep *.svg {right,left}/db/preprocessed_configs
diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py
index a051d175cf3..1a0de462a89 100755
--- a/docker/test/performance-comparison/report.py
+++ b/docker/test/performance-comparison/report.py
@@ -2,6 +2,7 @@
 
 import collections
 import csv
+import itertools
 import os
 import sys
 import traceback
@@ -9,6 +10,8 @@ import traceback
 report_errors = []
 status = 'success'
 message = 'See the report'
+long_tests = 0
+run_errors = 0
 
 print("""
 <!DOCTYPE html>
@@ -37,6 +40,7 @@ a:hover, a:active {{ color: #F40; text-decoration: underline; }}
 table {{ border: 0; }}
 .main {{ margin-left: 10%; }}
 p.links a {{ padding: 5px; margin: 3px; background: #FFF; line-height: 2; white-space: nowrap; box-shadow: 0 0 0 1px rgba(0, 0, 0, 0.05), 0 8px 25px -5px rgba(0, 0, 0, 0.1); }}
+
 .cancela,.cancela:link,.cancela:visited,.cancela:hover,.cancela:focus,.cancela:active{{
     color: inherit;
     text-decoration: none;
@@ -51,15 +55,6 @@ tr:nth-child(odd) td {{filter: brightness(95%);}}
 <h1>ClickHouse performance comparison</h1>
 """.format())
 
-
-table_template = """
-<h2 id="{anchor}"><a class="cancela" href="#{anchor}">{caption}</a></h2>
-<table>
-{header}
-{rows}
-</table>
-"""
-
 table_anchor = 0
 row_anchor = 0
 
@@ -79,16 +74,19 @@ def tr(x):
     #return '<tr onclick="location.href=\'#{a}\'" id={a}>{x}</tr>'.format(a=a, x=str(x))
     return '<tr id={a}>{x}</tr>'.format(a=a, x=str(x))
 
-def td(value, cell_class = ''):
-    return '<td class="{cell_class}">{value}</td>'.format(
-        cell_class = cell_class,
+def td(value, cell_attributes = ''):
+    return '<td {cell_attributes}>{value}</td>'.format(
+        cell_attributes = cell_attributes,
         value = value)
 
 def th(x):
     return '<th>' + str(x) + '</th>'
 
-def tableRow(r):
-    return tr(''.join([td(f) for f in r]))
+def tableRow(cell_values, cell_attributes = []):
+    return tr(''.join([td(v, a)
+        for v, a in itertools.zip_longest(
+            cell_values, cell_attributes,
+            fillvalue = '')]))
 
 def tableHeader(r):
     return tr(''.join([th(f) for f in r]))
@@ -103,7 +101,7 @@ def tableStart(title):
 def tableEnd():
     return '</table>'
 
-def tsvRowsRaw(n):
+def tsvRows(n):
     result = []
     try:
         with open(n, encoding='utf-8') as fd:
@@ -115,64 +113,81 @@ def tsvRowsRaw(n):
         pass
     return []
 
-def tsvTableRows(n):
-    rawRows = tsvRowsRaw(n)
+def htmlRows(n):
+    rawRows = tsvRows(n)
     result = ''
     for row in rawRows:
         result += tableRow(row)
     return result
 
-print(table_template.format(
-        anchor = nextTableAnchor(),
-        caption = 'Tested commits',
-        header = tableHeader(['Old', 'New']),
-        rows = tableRow([open('left-commit.txt').read(), open('right-commit.txt').read()])))
-
-print(table_template.format(
-    anchor = nextTableAnchor(),
-    caption = 'Changes in performance',
-    header = tableHeader(['Old, s', 'New, s',
-        'Relative difference (new&nbsp;-&nbsp;old)/old',
-        'Randomization distribution quantiles [5%,&nbsp;50%,&nbsp;95%]',
-        'Test', 'Query']),
-    rows = tsvTableRows('changed-perf.tsv')))
-
-print(table_template.format(
-    anchor = nextTableAnchor(),
-    caption = 'Slow on client',
-    header = tableHeader(['Client time, s', 'Server time, s', 'Ratio', 'Query']),
-    rows = tsvTableRows('slow-on-client.tsv')))
-
-print(table_template.format(
-    anchor = nextTableAnchor(),
-    caption = 'Unstable queries',
-    header = tableHeader(['Old, s', 'New, s',
-        'Relative difference (new&nbsp;-&nbsp;old)/old',
-        'Randomization distribution quantiles [5%,&nbsp;50%,&nbsp;95%]',
-        'Test', 'Query']),
-    rows = tsvTableRows('unstable-queries.tsv')))
-
-print(table_template.format(
-    anchor = nextTableAnchor(),
-    caption = 'Run errors',
-    header = tableHeader(['A', 'B']),
-    rows = tsvTableRows('run-errors.log')))
-
-print(table_template.format(
-    anchor = nextTableAnchor(),
-    caption = 'Tests with most unstable queries',
-    header = tableHeader(['Test', 'Unstable', 'Changed perf', 'Total not OK']),
-    rows = tsvTableRows('bad-tests.tsv')))
-
-def print_test_times():
-    rows = tsvRowsRaw('test-times.tsv')
+def printSimpleTable(caption, columns, rows):
     if not rows:
         return
 
-    print(rows, file=sys.stderr)
+    print(tableStart(caption))
+    print(tableHeader(columns))
+    for row in rows:
+        print(tableRow(row))
+    print(tableEnd())
 
-    print(tableStart('Test times'))
-    print(tableHeader([
+printSimpleTable('Tested commits', ['Old', 'New'],
+    [[open('left-commit.txt').read(), open('right-commit.txt').read()]])
+
+def print_changes():
+    rows = tsvRows('changed-perf.tsv')
+    if not rows:
+        return
+
+    print(tableStart('Changes in performance'))
+    columns = [
+        'Old, s',                                                        # 0
+        'New, s',                                                        # 1
+        'Relative difference (new&nbsp;-&nbsp;old)/old',                 # 2
+        'Randomization distribution quantiles [5%,&nbsp;50%,&nbsp;95%]', # 3
+        'Test',                                                          # 4
+        'Query',                                                         # 5
+        ]
+
+    print(tableHeader(columns))
+
+    attrs = ['' for c in columns]
+    for row in rows:
+        if float(row[2]) > 0.:
+            attrs[2] = 'style="background: #adbdff"'
+        else:
+            attrs[2] = 'style="background: #ffb0a0"'
+
+        print(tableRow(row, attrs))
+
+    print(tableEnd())
+
+print_changes()
+
+printSimpleTable('Slow on client',
+    ['Client time, s', 'Server time, s', 'Ratio', 'Query'],
+    tsvRows('slow-on-client.tsv'))
+
+printSimpleTable('Unstable queries',
+    [
+        'Old, s', 'New, s', 'Relative difference (new&nbsp;-&nbsp;old)/old',
+        'Randomization distribution quantiles [5%,&nbsp;50%,&nbsp;95%]',
+        'Test', 'Query'
+    ],
+    tsvRows('unstable-queries.tsv'))
+
+printSimpleTable('Run errors', ['Test', 'Error'], tsvRows('run-errors.tsv'))
+
+printSimpleTable('Tests with most unstable queries',
+    ['Test', 'Unstable', 'Changed perf', 'Total not OK'],
+    tsvRows('bad-tests.tsv'))
+
+def print_test_times():
+    global long_tests
+    rows = tsvRows('test-times.tsv')
+    if not rows:
+        return
+
+    columns = [
         'Test',                                          #0
         'Wall clock time, s',                            #1
         'Total client time, s',                          #2
@@ -181,10 +196,20 @@ def print_test_times():
         'Longest query<br>(sum for all runs), s',        #5
         'Avg wall clock time<br>(sum for all runs), s',  #6
         'Shortest query<br>(sum for all runs), s',       #7
-        ]))
+        ]
 
+    print(tableStart('Test times'))
+    print(tableHeader(columns))
+
+    attrs = ['' for c in columns]
     for r in rows:
-        print(tableRow(r))
+        if float(r[6]) > 10:
+            long_tests += 1
+            attrs[6] = 'style="background: #ffb0a0"'
+        else:
+            attrs[6] = ''
+
+        print(tableRow(r, attrs))
 
     print(tableEnd())
 
@@ -207,7 +232,7 @@ print("""
 """)
 
 if report_errors:
-    status = 'error'
+    status = 'failure'
     message = 'Errors while building the report.'
 
 print("""