#!/usr/bin/python3 import argparse import ast import collections import csv import itertools import json import os import os.path import pprint import sys import traceback parser = argparse.ArgumentParser(description="Create performance test report") parser.add_argument( "--report", default="main", choices=["main", "all-queries"], help="Which report to build", ) parser.add_argument("--no-tests-run", action="store_true", default=False) args = parser.parse_args() tables = [] errors_explained = [] report_errors = [] error_tests = 0 slow_average_tests = 0 faster_queries = 0 slower_queries = 0 unstable_queries = 0 very_unstable_queries = 0 unstable_backward_incompatible_queries = 0 # max seconds to run one query by itself, not counting preparation allowed_single_run_time = 2 color_bad = "#ffb0c0" color_good = "#b0d050" header_template = """ Clickhouse performance comparison

ClickHouse performance comparison

""" table_anchor = 0 row_anchor = 0 def currentTableAnchor(): global table_anchor return f"{table_anchor}" def newTableAnchor(): global table_anchor table_anchor += 1 return currentTableAnchor() def currentRowAnchor(): global row_anchor global table_anchor return f"{table_anchor}.{row_anchor}" def nextRowAnchor(): global row_anchor global table_anchor return f"{table_anchor}.{row_anchor + 1}" def advanceRowAnchor(): global row_anchor global table_anchor row_anchor += 1 return currentRowAnchor() def tr(x, anchor=None): # return '{x}'.format(a=a, x=str(x)) anchor = anchor if anchor else advanceRowAnchor() return f"{x}" def td(value, cell_attributes=""): return "{value}".format( cell_attributes=cell_attributes, value=value ) def th(value, cell_attributes=""): return "{value}".format( cell_attributes=cell_attributes, value=value ) def tableRow(cell_values, cell_attributes=[], anchor=None): return tr( "".join( [ td(v, a) for v, a in itertools.zip_longest( cell_values, cell_attributes, fillvalue="" ) if a is not None and v is not None ] ), anchor, ) def tableHeader(cell_values, cell_attributes=[]): return tr( "".join( [ th(v, a) for v, a in itertools.zip_longest( cell_values, cell_attributes, fillvalue="" ) if a is not None and v is not None ] ) ) def tableStart(title): cls = "-".join(title.lower().split(" ")[:3]) global table_anchor table_anchor = cls anchor = currentTableAnchor() help_anchor = "-".join(title.lower().split(" ")) return f"""

{title} ?

""" def tableEnd(): return "
" def tsvRows(n): try: with open(n, encoding="utf-8") as fd: result = [] for row in csv.reader(fd, delimiter="\t", quoting=csv.QUOTE_NONE): new_row = [] for e in row: # The first one .encode('utf-8').decode('unicode-escape') decodes the escape characters from the strings. # The second one (encode('latin1').decode('utf-8')) fixes the changes with unicode vs utf-8 chars, so # 'Чем зÐ�нимаеÑ�ЬÑ�Ñ�' is transformed back into 'Чем зАнимаешЬся'. new_row.append( e.encode("utf-8") .decode("unicode-escape") .encode("latin1") .decode("utf-8") ) result.append(new_row) return result except: report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1]) pass return [] def htmlRows(n): rawRows = tsvRows(n) result = "" for row in rawRows: result += tableRow(row) return result def addSimpleTable(caption, columns, rows, pos=None): global tables text = "" if not rows: return text += tableStart(caption) text += tableHeader(columns) for row in rows: text += tableRow(row) text += tableEnd() tables.insert(pos if pos else len(tables), text) def add_tested_commits(): global report_errors try: addSimpleTable( "Tested Commits", ["Old", "New"], [ [ "
{}
".format(x) for x in [ open("left-commit.txt").read(), open("right-commit.txt").read(), ] ] ], ) except: # Don't fail if no commit info -- maybe it's a manual run. report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1]) pass def add_report_errors(): global tables global report_errors # Add the errors reported by various steps of comparison script try: report_errors += [l.strip() for l in open("report/errors.log")] except: report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1]) pass if not report_errors: return text = tableStart("Errors while Building the Report") text += tableHeader(["Error"]) for x in report_errors: text += tableRow([x]) text += tableEnd() # Insert after Tested Commits tables.insert(1, text) errors_explained.append( [ f'There were some errors while building the report' ] ) def add_errors_explained(): if not errors_explained: return text = '' text += tableStart("Error Summary") text += tableHeader(["Description"]) for row in errors_explained: text += tableRow(row) text += tableEnd() global tables tables.insert(1, text) if args.report == "main": print((header_template.format())) add_tested_commits() def print_status(status, message): print( ( """ """.format( status=status, message=message ) ) ) if args.no_tests_run: for t in tables: print(t) print( "

No tests to run. Only changed tests were run, but all changed tests are from another batch.

" ) print( f"""
{os.getenv("CHPC_ADD_REPORT_LINKS") or ''} """ ) # Why failure? Because otherwise we will not notice if we have a bug that leads to 0 tests being run print_status("failure", "No tests changed, nothing to run") exit(0) run_error_rows = tsvRows("run-errors.tsv") error_tests += len(run_error_rows) addSimpleTable("Run Errors", ["Test", "Error"], run_error_rows) if run_error_rows: errors_explained.append( [ f'There were some errors while running the tests' ] ) def add_backward_incompatible(): rows = tsvRows("report/partial-queries-report.tsv") if not rows: return global unstable_backward_incompatible_queries, slow_average_tests, tables text = tableStart("Backward-incompatible queries") columns = ["Median time, s", "Relative time variance", "Test", "#", "Query"] text += tableHeader(columns) attrs = ["" for c in columns] for row in rows: anchor = f"{currentTableAnchor()}.{row[2]}.{row[3]}" if float(row[1]) > 0.10: attrs[1] = f'style="background: {color_bad}"' unstable_backward_incompatible_queries += 1 errors_explained.append( [ f"The query no. {row[3]} of test '{row[2]}' has excessive variance of run time. Keep it below 10%" ] ) else: attrs[1] = "" if float(row[0]) > allowed_single_run_time: attrs[0] = f'style="background: {color_bad}"' errors_explained.append( [ f'The query no. {row[3]} of test \'{row[2]}\' is taking too long to run. Keep the run time below {allowed_single_run_time} seconds"' ] ) slow_average_tests += 1 else: attrs[0] = "" text += tableRow(row, attrs, anchor) text += tableEnd() tables.append(text) add_backward_incompatible() def add_changes(): rows = tsvRows("report/changed-perf.tsv") if not rows: return global faster_queries, slower_queries, tables text = tableStart("Changes in Performance") columns = [ "Old, s", # 0 "New, s", # 1 "Ratio of speedup (-) or slowdown (+)", # 2 "Relative difference (new − old) / old", # 3 "p < 0.01 threshold", # 4 "", # Failed # 5 "Test", # 6 "#", # 7 "Query", # 8 ] attrs = ["" for c in columns] attrs[5] = None text += tableHeader(columns, attrs) for row in rows: anchor = f"{currentTableAnchor()}.{row[6]}.{row[7]}" if int(row[5]): if float(row[3]) < 0.0: faster_queries += 1 attrs[2] = attrs[3] = f'style="background: {color_good}"' else: slower_queries += 1 attrs[2] = attrs[3] = f'style="background: {color_bad}"' errors_explained.append( [ f"The query no. {row[7]} of test '{row[6]}' has slowed down" ] ) else: attrs[2] = attrs[3] = "" text += tableRow(row, attrs, anchor) text += tableEnd() tables.append(text) add_changes() def add_unstable_queries(): global unstable_queries, very_unstable_queries, tables unstable_rows = tsvRows("report/unstable-queries.tsv") if not unstable_rows: return unstable_queries += len(unstable_rows) columns = [ "Old, s", # 0 "New, s", # 1 "Relative difference (new - old)/old", # 2 "p < 0.01 threshold", # 3 "", # Failed #4 "Test", # 5 "#", # 6 "Query", # 7 ] attrs = ["" for c in columns] attrs[4] = None text = tableStart("Unstable Queries") text += tableHeader(columns, attrs) for r in unstable_rows: anchor = f"{currentTableAnchor()}.{r[5]}.{r[6]}" if int(r[4]): very_unstable_queries += 1 attrs[3] = f'style="background: {color_bad}"' else: attrs[3] = "" # Just don't add the slightly unstable queries we don't consider # errors. It's not clear what the user should do with them. continue text += tableRow(r, attrs, anchor) text += tableEnd() # Don't add an empty table. if very_unstable_queries: tables.append(text) add_unstable_queries() skipped_tests_rows = tsvRows("analyze/skipped-tests.tsv") addSimpleTable("Skipped Tests", ["Test", "Reason"], skipped_tests_rows) addSimpleTable( "Test Performance Changes", [ "Test", "Ratio of speedup (-) or slowdown (+)", "Queries", "Total not OK", "Changed perf", "Unstable", ], tsvRows("report/test-perf-changes.tsv"), ) def add_test_times(): global slow_average_tests, tables rows = tsvRows("report/test-times.tsv") if not rows: return columns = [ "Test", # 0 "Wall clock time, entire test, s", # 1 "Total client time for measured query runs, s", # 2 "Queries", # 3 "Longest query, total for measured runs, s", # 4 "Average query wall clock time, s", # 5 "Shortest query, total for measured runs, s", # 6 "", # Runs #7 ] attrs = ["" for c in columns] attrs[7] = None text = tableStart("Test Times") text += tableHeader(columns, attrs) allowed_average_run_time = 3.75 # 60 seconds per test at (7 + 1) * 2 runs for r in rows: anchor = f"{currentTableAnchor()}.{r[0]}" total_runs = (int(r[7]) + 1) * 2 # one prewarm run, two servers if r[0] != "Total" and float(r[5]) > allowed_average_run_time * total_runs: # FIXME should be 15s max -- investigate parallel_insert slow_average_tests += 1 attrs[5] = f'style="background: {color_bad}"' errors_explained.append( [ f"The test '{r[0]}' is too slow to run as a whole. Investigate whether the create and fill queries can be sped up" ] ) else: attrs[5] = "" if r[0] != "Total" and float(r[4]) > allowed_single_run_time * total_runs: slow_average_tests += 1 attrs[4] = f'style="background: {color_bad}"' errors_explained.append( [ f"Some query of the test '{r[0]}' is too slow to run. See the all queries report" ] ) else: attrs[4] = "" text += tableRow(r, attrs, anchor) text += tableEnd() tables.append(text) add_test_times() addSimpleTable( "Metric Changes", [ "Metric", "Old median value", "New median value", "Relative difference", "Times difference", ], tsvRows("metrics/changes.tsv"), ) add_report_errors() add_errors_explained() for t in tables: print(t) print( f""" """ ) status = "success" message = "See the report" message_array = [] if slow_average_tests: status = "failure" message_array.append(str(slow_average_tests) + " too long") if faster_queries: message_array.append(str(faster_queries) + " faster") if slower_queries: # This threshold should be synchronized with the value in https://github.com/ClickHouse/ClickHouse/blob/master/tests/ci/performance_comparison_check.py#L225 # False positives rate should be < 1%: https://shorturl.at/CDEK8 if slower_queries > 5: status = "failure" message_array.append(str(slower_queries) + " slower") if unstable_backward_incompatible_queries: very_unstable_queries += unstable_backward_incompatible_queries status = "failure" # Don't show mildly unstable queries, only the very unstable ones we # treat as errors. if very_unstable_queries: message_array.append(str(very_unstable_queries) + " unstable") # FIXME: uncomment the following lines when tests are stable and # reliable # if very_unstable_queries > 5: # error_tests += very_unstable_queries # status = "failure" # # error_tests += slow_average_tests # FIXME: until here if error_tests: status = "failure" message_array.insert(0, str(error_tests) + " errors") if message_array: message = ", ".join(message_array) if report_errors: status = "failure" message = "Errors while building the report." print_status(status, message) elif args.report == "all-queries": print((header_template.format())) add_tested_commits() def add_all_queries(): rows = tsvRows("report/all-queries.tsv") if not rows: return columns = [ "", # Changed #0 "", # Unstable #1 "Old, s", # 2 "New, s", # 3 "Ratio of speedup (-) or slowdown (+)", # 4 "Relative difference (new − old) / old", # 5 "p < 0.01 threshold", # 6 "Test", # 7 "#", # 8 "Query", # 9 ] attrs = ["" for c in columns] attrs[0] = None attrs[1] = None text = tableStart("All Query Times") text += tableHeader(columns, attrs) for r in rows: anchor = f"{currentTableAnchor()}.{r[7]}.{r[8]}" if int(r[1]): attrs[6] = f'style="background: {color_bad}"' else: attrs[6] = "" if int(r[0]): if float(r[5]) > 0.0: attrs[4] = attrs[5] = f'style="background: {color_bad}"' else: attrs[4] = attrs[5] = f'style="background: {color_good}"' else: attrs[4] = attrs[5] = "" if (float(r[2]) + float(r[3])) / 2 > allowed_single_run_time: attrs[2] = f'style="background: {color_bad}"' attrs[3] = f'style="background: {color_bad}"' else: attrs[2] = "" attrs[3] = "" text += tableRow(r, attrs, anchor) text += tableEnd() tables.append(text) add_all_queries() add_report_errors() for t in tables: print(t) print( f""" """ )