ClickHouse performance comparison

""" table_anchor = 0 row_anchor = 0 def currentTableAnchor(): global table_anchor return f'{table_anchor}' def newTableAnchor(): global table_anchor table_anchor += 1 return currentTableAnchor() def currentRowAnchor(): global row_anchor global table_anchor return f'{table_anchor}.{row_anchor}' def nextRowAnchor(): global row_anchor global table_anchor return f'{table_anchor}.{row_anchor + 1}' def advanceRowAnchor(): global row_anchor global table_anchor row_anchor += 1 return currentRowAnchor() def tr(x, anchor=None): #return '{x}'.format(a=a, x=str(x)) anchor = anchor if anchor else advanceRowAnchor() return f'{x}' def td(value, cell_attributes = ''): return '{value}'.format( cell_attributes = cell_attributes, value = value) def th(x): return '' + str(x) + '' def tableRow(cell_values, cell_attributes = [], anchor=None): return tr( ''.join([td(v, a) for v, a in itertools.zip_longest( cell_values, cell_attributes, fillvalue = '') if a is not None and v is not None]), anchor) def tableHeader(r): return tr(''.join([th(f) for f in r])) def tableStart(title): cls = '-'.join(title.lower().split(' ')[:3]); global table_anchor table_anchor = cls anchor = currentTableAnchor() help_anchor = '-'.join(title.lower().split(' ')); return f"""

{title} ^?

""" def tableEnd(): return '

' def tsvRows(n): result = [] try: with open(n, encoding='utf-8') as fd: return [row for row in csv.reader(fd, delimiter="\t", quotechar='"')] except: report_errors.append( traceback.format_exception_only( *sys.exc_info()[:2])[-1]) pass return [] def htmlRows(n): rawRows = tsvRows(n) result = '' for row in rawRows: result += tableRow(row) return result def addSimpleTable(caption, columns, rows, pos=None): global tables text = '' if not rows: return text += tableStart(caption) text += tableHeader(columns) for row in rows: text += tableRow(row) text += tableEnd() tables.insert(pos if pos else len(tables), text) def add_tested_commits(): global report_errors try: addSimpleTable('Tested Commits', ['Old', 'New'], [['

{}

'.format(x) for x in [open('left-commit.txt').read(), open('right-commit.txt').read()]]]) except: # Don't fail if no commit info -- maybe it's a manual run. report_errors.append( traceback.format_exception_only( *sys.exc_info()[:2])[-1]) pass def add_report_errors(): global tables global report_errors # Add the errors reported by various steps of comparison script try: report_errors += [l.strip() for l in open('report/errors.log')] except: report_errors.append( traceback.format_exception_only( *sys.exc_info()[:2])[-1]) pass if not report_errors: return text = tableStart('Errors while Building the Report') text += tableHeader(['Error']) for x in report_errors: text += tableRow([x]) text += tableEnd() # Insert after Tested Commits tables.insert(1, text) errors_explained.append([f'There were some errors while building the report']); def add_errors_explained(): if not errors_explained: return text = '' text += tableStart('Error Summary') text += tableHeader(['Description']) for row in errors_explained: text += tableRow(row) text += tableEnd() global tables tables.insert(1, text) if args.report == 'main': print(header_template.format()) add_tested_commits() run_error_rows = tsvRows('run-errors.tsv') error_tests += len(run_error_rows) addSimpleTable('Run Errors', ['Test', 'Error'], run_error_rows) if run_error_rows: errors_explained.append([f'There were some errors while running the tests']); slow_on_client_rows = tsvRows('report/slow-on-client.tsv') error_tests += len(slow_on_client_rows) addSimpleTable('Slow on Client', ['Client time, s', 'Server time, s', 'Ratio', 'Test', 'Query'], slow_on_client_rows) if slow_on_client_rows: errors_explained.append([f'Some queries are taking noticeable time client-side (missing `FORMAT Null`?)']); unmarked_short_rows = tsvRows('report/inconsistent-short-marking.tsv') error_tests += len(unmarked_short_rows) addSimpleTable('Inconsistent Short Marking', ['Problem', 'Is marked as short', 'New client time, s', 'Test', '#', 'Query'], unmarked_short_rows) if unmarked_short_rows: errors_explained.append([f'Some queries have inconsistent short marking']); def add_partial(): rows = tsvRows('report/partial-queries-report.tsv') if not rows: return global unstable_partial_queries, slow_average_tests, tables text = tableStart('Partial Queries') columns = ['Median time, s', 'Relative time variance', 'Test', '#', 'Query'] text += tableHeader(columns) attrs = ['' for c in columns] for row in rows: anchor = f'{currentTableAnchor()}.{row[2]}.{row[3]}' if float(row[1]) > 0.10: attrs[1] = f'style="background: {color_bad}"' unstable_partial_queries += 1 errors_explained.append([f'The query no. {row[3]} of test \'{row[2]}\' has excessive variance of run time. Keep it below 10%']) else: attrs[1] = '' if float(row[0]) > allowed_single_run_time: attrs[0] = f'style="background: {color_bad}"' errors_explained.append([f'The query no. {row[3]} of test \'{row[2]}\' is taking too long to run. Keep the run time below {allowed_single_run_time} seconds"']) slow_average_tests += 1 else: attrs[0] = '' text += tableRow(row, attrs, anchor) text += tableEnd() tables.append(text) add_partial() def add_changes(): rows = tsvRows('report/changed-perf.tsv') if not rows: return global faster_queries, slower_queries, tables text = tableStart('Changes in Performance') columns = [ 'Old, s', # 0 'New, s', # 1 'Ratio of speedup (-) or slowdown (+)', # 2 'Relative difference (new − old) / old', # 3 'p < 0.01 threshold', # 4 # Failed # 5 'Test', # 6 '#', # 7 'Query', # 8 ] text += tableHeader(columns) attrs = ['' for c in columns] attrs[5] = None for row in rows: anchor = f'{currentTableAnchor()}.{row[6]}.{row[7]}' if int(row[5]): if float(row[3]) < 0.: faster_queries += 1 attrs[2] = attrs[3] = f'style="background: {color_good}"' else: slower_queries += 1 attrs[2] = attrs[3] = f'style="background: {color_bad}"' errors_explained.append([f'The query no. {row[7]} of test \'{row[6]}\' has slowed down']) else: attrs[2] = attrs[3] = '' text += tableRow(row, attrs, anchor) text += tableEnd() tables.append(text) add_changes() def add_unstable_queries(): global unstable_queries, very_unstable_queries, tables unstable_rows = tsvRows('report/unstable-queries.tsv') if not unstable_rows: return unstable_queries += len(unstable_rows) columns = [ 'Old, s', #0 'New, s', #1 'Relative difference (new - old)/old', #2 'p < 0.01 threshold', #3 # Failed #4 'Test', #5 '#', #6 'Query' #7 ] text = tableStart('Unstable Queries') text += tableHeader(columns) attrs = ['' for c in columns] attrs[4] = None for r in unstable_rows: anchor = f'{currentTableAnchor()}.{r[5]}.{r[6]}' if int(r[4]): very_unstable_queries += 1 attrs[3] = f'style="background: {color_bad}"' else: attrs[3] = '' text += tableRow(r, attrs, anchor) text += tableEnd() tables.append(text) add_unstable_queries() skipped_tests_rows = tsvRows('analyze/skipped-tests.tsv') addSimpleTable('Skipped Tests', ['Test', 'Reason'], skipped_tests_rows) addSimpleTable('Test Performance Changes', ['Test', 'Ratio of speedup (-) or slowdown (+)', 'Queries', 'Total not OK', 'Changed perf', 'Unstable'], tsvRows('report/test-perf-changes.tsv')) def add_test_times(): global slow_average_tests, tables rows = tsvRows('report/test-times.tsv') if not rows: return columns = [ 'Test', #0 'Wall clock time, s', #1 'Total client time, s', #2 'Total queries', #3 'Longest query
(sum for all runs), s', #4 'Avg wall clock time
(sum for all runs), s', #5 'Shortest query
(sum for all runs), s', #6 ] text = tableStart('Test Times') text += tableHeader(columns) nominal_runs = 7 # FIXME pass this as an argument total_runs = (nominal_runs + 1) * 2 # one prewarm run, two servers allowed_average_run_time = allowed_single_run_time + 60 / total_runs; # some allowance for fill/create queries attrs = ['' for c in columns] for r in rows: anchor = f'{currentTableAnchor()}.{r[0]}' if float(r[5]) > allowed_average_run_time * total_runs: # FIXME should be 15s max -- investigate parallel_insert slow_average_tests += 1 attrs[5] = f'style="background: {color_bad}"' errors_explained.append([f'The test \'{r[0]}\' is too slow to run as a whole. Investigate whether the create and fill queries can be sped up']) else: attrs[5] = '' if float(r[4]) > allowed_single_run_time * total_runs: slow_average_tests += 1 attrs[4] = f'style="background: {color_bad}"' errors_explained.append([f'Some query of the test \'{r[0]}\' is too slow to run. See the all queries report']) else: attrs[4] = '' text += tableRow(r, attrs, anchor) text += tableEnd() tables.append(text) add_test_times() addSimpleTable('Metric Changes', ['Metric', 'Old median value', 'New median value', 'Relative difference', 'Times difference'], tsvRows('metrics/changes.tsv')) add_report_errors() add_errors_explained() for t in tables: print(t) print("""

ClickHouse performance comparison

{title} ?

{title} ^?