From af9e5ff290f66ddd0e3f22ff2350f3f71b138b4b Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Wed, 26 Feb 2020 17:42:03 +0300 Subject: [PATCH 1/3] fix sed with binary results (sed: RE error: illegal byte sequence) --- dbms/tests/clickhouse-test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/tests/clickhouse-test b/dbms/tests/clickhouse-test index 7d3d65defc6..aafb71a2ffa 100755 --- a/dbms/tests/clickhouse-test +++ b/dbms/tests/clickhouse-test @@ -76,8 +76,8 @@ def run_single_test(args, ext, server_logs_level, client_options, case_file, std total_time = (datetime.now() - start_time).total_seconds() # Normalize randomized database names in stdout, stderr files. - os.system("sed -i -e 's/{test_db}/default/g' {file}".format(test_db=args.database, file=stdout_file)) - os.system("sed -i -e 's/{test_db}/default/g' {file}".format(test_db=args.database, file=stderr_file)) + os.system("LC_ALL=C LC_CTYPE=C LANG=C sed -i -e 's/{test_db}/default/g' {file}".format(test_db=args.database, file=stdout_file)) + os.system("LC_ALL=C LC_CTYPE=C LANG=C sed -i -e 's/{test_db}/default/g' {file}".format(test_db=args.database, file=stderr_file)) stdout = open(stdout_file, 'r').read() if os.path.exists(stdout_file) else '' stdout = unicode(stdout, errors='replace', encoding='utf-8') From a26092865808498a10cf9a15701569651761091b Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Wed, 26 Feb 2020 17:53:19 +0300 Subject: [PATCH 2/3] better --- dbms/tests/clickhouse-test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/tests/clickhouse-test b/dbms/tests/clickhouse-test index aafb71a2ffa..5f987890aaa 100755 --- a/dbms/tests/clickhouse-test +++ b/dbms/tests/clickhouse-test @@ -76,8 +76,8 @@ def run_single_test(args, ext, server_logs_level, client_options, case_file, std total_time = (datetime.now() - start_time).total_seconds() # Normalize randomized database names in stdout, stderr files. - os.system("LC_ALL=C LC_CTYPE=C LANG=C sed -i -e 's/{test_db}/default/g' {file}".format(test_db=args.database, file=stdout_file)) - os.system("LC_ALL=C LC_CTYPE=C LANG=C sed -i -e 's/{test_db}/default/g' {file}".format(test_db=args.database, file=stderr_file)) + os.system("LC_ALL=C sed -i -e 's/{test_db}/default/g' {file}".format(test_db=args.database, file=stdout_file)) + os.system("LC_ALL=C sed -i -e 's/{test_db}/default/g' {file}".format(test_db=args.database, file=stderr_file)) stdout = open(stdout_file, 'r').read() if os.path.exists(stdout_file) else '' stdout = unicode(stdout, errors='replace', encoding='utf-8') From 9ed1c7ea22dbc286b5b5e0a3a6b8a198d4085e51 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Wed, 26 Feb 2020 19:18:58 +0300 Subject: [PATCH 3/3] performance comparison --- docker/test/performance-comparison/report.py | 205 ++++++++++++------- 1 file changed, 132 insertions(+), 73 deletions(-) diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py index f8801defb32..a051d175cf3 100755 --- a/docker/test/performance-comparison/report.py +++ b/docker/test/performance-comparison/report.py @@ -4,8 +4,13 @@ import collections import csv import os import sys +import traceback -doc_template = """ +report_errors = [] +status = 'success' +message = 'See the report' + +print(""" - {header} + Clickhouse performance comparison
-

{header}

-{test_part} - - - -""" +

ClickHouse performance comparison

+""".format()) + table_template = """

{caption}

@@ -84,74 +79,138 @@ def tr(x): #return '{x}'.format(a=a, x=str(x)) return '{x}'.format(a=a, x=str(x)) -def td(x): - return '' + str(x) + '' +def td(value, cell_class = ''): + return '{value}'.format( + cell_class = cell_class, + value = value) def th(x): return '' + str(x) + '' -def table_row(r): +def tableRow(r): return tr(''.join([td(f) for f in r])) -def table_header(r): +def tableHeader(r): return tr(''.join([th(f) for f in r])) -def tsv_rows(n): +def tableStart(title): + return """ +

{title}

+""".format( + anchor = nextTableAnchor(), + title = title) + +def tableEnd(): + return '
' + +def tsvRowsRaw(n): + result = [] + try: + with open(n, encoding='utf-8') as fd: + return [row for row in csv.reader(fd, delimiter="\t", quotechar='"')] + except: + report_errors.append( + traceback.format_exception_only( + *sys.exc_info()[:2])[-1]) + pass + return [] + +def tsvTableRows(n): + rawRows = tsvRowsRaw(n) result = '' - with open(n, encoding='utf-8') as fd: - for row in csv.reader(fd, delimiter="\t", quotechar='"'): - result += table_row(row) + for row in rawRows: + result += tableRow(row) return result -params = collections.defaultdict(str) -params['header'] = "ClickHouse Performance Comparison" -params['test_part'] = ( - table_template.format( +print(table_template.format( anchor = nextTableAnchor(), caption = 'Tested commits', - header = table_header(['Old', 'New']), - rows = table_row([open('left-commit.txt').read(), open('right-commit.txt').read()]) - ) + - table_template.format( - anchor = nextTableAnchor(), - caption = 'Changes in performance', - header = table_header(['Old, s', 'New, s', - 'Relative difference (new - old)/old', - 'Randomization distribution quantiles [5%, 50%, 95%]', - 'Test', 'Query']), - rows = tsv_rows('changed-perf.tsv')) + - table_template.format( - anchor = nextTableAnchor(), - caption = 'Slow on client', - header = table_header(['Client time, s', 'Server time, s', 'Ratio', 'Query']), - rows = tsv_rows('slow-on-client.tsv')) + - table_template.format( - anchor = nextTableAnchor(), - caption = 'Unstable queries', - header = table_header(['Old, s', 'New, s', - 'Relative difference (new - old)/old', - 'Randomization distribution quantiles [5%, 50%, 95%]', - 'Test', 'Query']), - rows = tsv_rows('unstable-queries.tsv')) + - table_template.format( - anchor = nextTableAnchor(), - caption = 'Run errors', - header = table_header(['A', 'B']), - rows = tsv_rows('run-errors.log')) + - table_template.format( - anchor = nextTableAnchor(), - caption = 'Tests with most unstable queries', - header = table_header(['Test', 'Unstable', 'Changed perf', 'Total not OK']), - rows = tsv_rows('bad-tests.tsv')) + - table_template.format( - anchor = nextTableAnchor(), - caption = 'Tests times', - header = table_header(['Test', 'Wall clock time, s', 'Total client time, s', - 'Total queries', - 'Ignored short queries', - 'Longest query
(sum for all runs), s', - 'Avg wall clock time
(sum for all runs), s', - 'Shortest query
(sum for all runs), s']), - rows = tsv_rows('test-times.tsv')) -) -print(doc_template.format_map(params)) + header = tableHeader(['Old', 'New']), + rows = tableRow([open('left-commit.txt').read(), open('right-commit.txt').read()]))) + +print(table_template.format( + anchor = nextTableAnchor(), + caption = 'Changes in performance', + header = tableHeader(['Old, s', 'New, s', + 'Relative difference (new - old)/old', + 'Randomization distribution quantiles [5%, 50%, 95%]', + 'Test', 'Query']), + rows = tsvTableRows('changed-perf.tsv'))) + +print(table_template.format( + anchor = nextTableAnchor(), + caption = 'Slow on client', + header = tableHeader(['Client time, s', 'Server time, s', 'Ratio', 'Query']), + rows = tsvTableRows('slow-on-client.tsv'))) + +print(table_template.format( + anchor = nextTableAnchor(), + caption = 'Unstable queries', + header = tableHeader(['Old, s', 'New, s', + 'Relative difference (new - old)/old', + 'Randomization distribution quantiles [5%, 50%, 95%]', + 'Test', 'Query']), + rows = tsvTableRows('unstable-queries.tsv'))) + +print(table_template.format( + anchor = nextTableAnchor(), + caption = 'Run errors', + header = tableHeader(['A', 'B']), + rows = tsvTableRows('run-errors.log'))) + +print(table_template.format( + anchor = nextTableAnchor(), + caption = 'Tests with most unstable queries', + header = tableHeader(['Test', 'Unstable', 'Changed perf', 'Total not OK']), + rows = tsvTableRows('bad-tests.tsv'))) + +def print_test_times(): + rows = tsvRowsRaw('test-times.tsv') + if not rows: + return + + print(rows, file=sys.stderr) + + print(tableStart('Test times')) + print(tableHeader([ + 'Test', #0 + 'Wall clock time, s', #1 + 'Total client time, s', #2 + 'Total queries', #3 + 'Ignored short queries', #4 + 'Longest query
(sum for all runs), s', #5 + 'Avg wall clock time
(sum for all runs), s', #6 + 'Shortest query
(sum for all runs), s', #7 + ])) + + for r in rows: + print(tableRow(r)) + + print(tableEnd()) + +print_test_times() + +if len(report_errors): + print(tableStart('Errors while building the report')) + print(tableHeader(['Error'])) + for x in report_errors: + print(tableRow([x])) + print(tableEnd()) + + +print(""" + + + +""") + +if report_errors: + status = 'error' + message = 'Errors while building the report.' + +print(""" + + +""".format(status=status, message=message))