ClickHouse performance comparison

""" table_anchor = 0 row_anchor = 0 def nextTableAnchor(): global table_anchor table_anchor += 1 return str(table_anchor) def nextRowAnchor(): global row_anchor global table_anchor row_anchor += 1 return str(table_anchor) + "." + str(row_anchor) def tr(x): a = nextRowAnchor() #return '{x}'.format(a=a, x=str(x)) return '{x}'.format(a=a, x=str(x)) def td(value, cell_attributes = ''): return '{value}'.format( cell_attributes = cell_attributes, value = value) def th(x): return '' + str(x) + '' def tableRow(cell_values, cell_attributes = []): return tr(''.join([td(v, a) for v, a in itertools.zip_longest( cell_values, cell_attributes, fillvalue = '')])) def tableHeader(r): return tr(''.join([th(f) for f in r])) def tableStart(title): return """

{title}

""".format( anchor = nextTableAnchor(), title = title) def tableEnd(): return '

' def tsvRows(n): result = [] try: with open(n, encoding='utf-8') as fd: return [row for row in csv.reader(fd, delimiter="\t", quotechar='"')] except: report_errors.append( traceback.format_exception_only( *sys.exc_info()[:2])[-1]) pass return [] def htmlRows(n): rawRows = tsvRows(n) result = '' for row in rawRows: result += tableRow(row) return result def printSimpleTable(caption, columns, rows): if not rows: return print(tableStart(caption)) print(tableHeader(columns)) for row in rows: print(tableRow(row)) print(tableEnd()) if args.report == 'main': print(header_template.format()) printSimpleTable('Tested commits', ['Old', 'New'], [['

{}

'.format(x) for x in [open('left-commit.txt').read(), open('right-commit.txt').read()]]]) def print_changes(): rows = tsvRows('changed-perf.tsv') if not rows: return global faster_queries, slower_queries print(tableStart('Changes in performance')) columns = [ 'Old, s', # 0 'New, s', # 1 'Relative difference (new - old)/old', # 2 'Randomization distribution quantiles \ [5%, 50%, 95%, 99%]', # 3 'Test', # 4 'Query', # 5 ] print(tableHeader(columns)) attrs = ['' for c in columns] for row in rows: attrs[2] = '' if abs(float(row[2])) > 0.10: if float(row[2]) < 0.: faster_queries += 1 attrs[2] = 'style="background: #adbdff"' else: slower_queries += 1 attrs[2] = 'style="background: #ffb0a0"' print(tableRow(row, attrs)) print(tableEnd()) print_changes() slow_on_client_rows = tsvRows('slow-on-client.tsv') error_tests += len(slow_on_client_rows) printSimpleTable('Slow on client', ['Client time, s', 'Server time, s', 'Ratio', 'Query'], slow_on_client_rows) def print_unstable_queries(): global unstable_queries global very_unstable_queries unstable_rows = tsvRows('unstable-queries.tsv') if not unstable_rows: return unstable_queries += len(unstable_rows) columns = [ 'Old, s', #0 'New, s', #1 'Relative difference (new - old)/old', #2 'Randomization distribution quantiles [5%, 50%, 95%, 99%]', #3 'Test', #4 'Query' #5 ] print(tableStart('Unstable queries')) print(tableHeader(columns)) attrs = ['' for c in columns] for r in unstable_rows: rd = ast.literal_eval(r[3]) # Note the zero-based array index, this is rd[3] in SQL. if rd[2] > 0.2: very_unstable_queries += 1 attrs[3] = 'style="background: #ffb0a0"' else: attrs[3] = '' print(tableRow(r, attrs)) print(tableEnd()) print_unstable_queries() run_error_rows = tsvRows('run-errors.tsv') error_tests += len(run_error_rows) printSimpleTable('Run errors', ['Test', 'Error'], run_error_rows) skipped_tests_rows = tsvRows('skipped-tests.tsv') printSimpleTable('Skipped tests', ['Test', 'Reason'], skipped_tests_rows) printSimpleTable('Tests with most unstable queries', ['Test', 'Unstable', 'Changed perf', 'Total not OK'], tsvRows('bad-tests.tsv')) def print_test_times(): global slow_average_tests rows = tsvRows('test-times.tsv') if not rows: return columns = [ 'Test', #0 'Wall clock time, s', #1 'Total client time, s', #2 'Total queries', #3 'Ignored short queries', #4 'Longest query
(sum for all runs), s', #5 'Avg wall clock time
(sum for all runs), s', #6 'Shortest query
(sum for all runs), s', #7 ] print(tableStart('Test times')) print(tableHeader(columns)) runs = 13 # FIXME pass this as an argument attrs = ['' for c in columns] for r in rows: if float(r[6]) > 3 * runs: # FIXME should be 15s max -- investigate parallel_insert slow_average_tests += 1 attrs[6] = 'style="background: #ffb0a0"' else: attrs[6] = '' if float(r[5]) > 4 * runs: slow_average_tests += 1 attrs[5] = 'style="background: #ffb0a0"' else: attrs[5] = '' print(tableRow(r, attrs)) print(tableEnd()) print_test_times() # Add the errors reported by various steps of comparison script report_errors += [l.strip() for l in open('report-errors.rep')] if len(report_errors): print(tableStart('Errors while building the report')) print(tableHeader(['Error'])) for x in report_errors: print(tableRow([x])) print(tableEnd()) print("""

Test output All queries Log

""") status = 'success' message = 'See the report' message_array = [] if slow_average_tests: status = 'failure' message_array.append(str(slow_average_tests) + ' too long') if faster_queries: message_array.append(str(faster_queries) + ' faster') if slower_queries: status = 'failure' message_array.append(str(slower_queries) + ' slower') if unstable_queries: message_array.append(str(unstable_queries) + ' unstable') if very_unstable_queries: status = 'failure' error_tests += slow_average_tests if error_tests: status = 'failure' message_array.append(str(error_tests) + ' errors') if message_array: message = ', '.join(message_array) if report_errors: status = 'failure' message = 'Errors while building the report.' print(""" """.format(status=status, message=message)) elif args.report == 'all-queries': print(header_template.format()) printSimpleTable('Tested commits', ['Old', 'New'], [['

{}

'.format(x) for x in [open('left-commit.txt').read(), open('right-commit.txt').read()]]]) def print_all_queries(): rows = tsvRows('all-queries.tsv') if not rows: return columns = [ 'Old, s', #0 'New, s', #1 'Relative difference (new - old)/old', #2 'Times speedup/slowdown', #3 'Randomization distribution quantiles \ [5%, 50%, 95%, 99%]', #4 'Test', #5 'Query', #6 ] print(tableStart('All query times')) print(tableHeader(columns)) attrs = ['' for c in columns] for r in rows: rd = ast.literal_eval(r[4]) # Note the zero-based array index, this is rd[4] in SQL. threshold = rd[3] if threshold > 0.2: attrs[4] = 'style="background: #ffb0a0"' else: attrs[4] = '' diff = float(r[2]) if abs(diff) > threshold and threshold >= 0.05: if diff > 0.: attrs[3] = 'style="background: #ffb0a0"' else: attrs[3] = 'style="background: #adbdff"' else: attrs[3] = '' print(tableRow(r, attrs)) print(tableEnd()) print_all_queries() print("""

Test output Main report Log

""")