From de8fc04b0395a1326cbe009ba87bfafbf87f3dde Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 22 Jan 2023 17:43:27 +0000 Subject: [PATCH 1/2] Performance report: "Partial queries" --> "Backward-incompatible queries --- docker/test/performance-comparison/README.md | 2 +- docker/test/performance-comparison/compare.sh | 6 +++--- docker/test/performance-comparison/report.py | 14 +++++++------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docker/test/performance-comparison/README.md b/docker/test/performance-comparison/README.md index 719fbd82b22..fd9001e23c7 100644 --- a/docker/test/performance-comparison/README.md +++ b/docker/test/performance-comparison/README.md @@ -50,7 +50,7 @@ Action required for every item -- these are errors that must be fixed. A query is supposed to run longer than 0.1 second. If your query runs faster, increase the amount of processed data to bring the run time above this threshold. You can use a bigger table (e.g. `hits_100m` instead of `hits_10m`), increase a `LIMIT`, make a query single-threaded, and so on. Queries that are too fast suffer from poor stability and precision. -#### Partial Queries +#### Backward-incompatible Queries Action required for the cells marked in red. Shows the queries we are unable to run on an old server -- probably because they contain a new function. You should see this table when you add a new function and a performance test for it. Check that the run time and variance are acceptable (run time between 0.1 and 1 seconds, variance below 10%). If not, they will be highlighted in red. diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 4733cfd3924..338a0c02a55 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -399,7 +399,7 @@ clickhouse-local --query " create view query_runs as select * from file('analyze/query-runs.tsv', TSV, 'test text, query_index int, query_id text, version UInt8, time float'); --- Separately process 'partial' queries which we could only run on the new server +-- Separately process backward-incompatible ('partial') queries which we could only run on the new server -- because they use new functions. We can't make normal stats for them, but still -- have to show some stats so that the PR author can tweak them. create view partial_queries as select test, query_index @@ -650,7 +650,7 @@ create view partial_query_times as select * from 'test text, query_index int, time_stddev float, time_median double') ; --- Report for partial queries that we could only run on the new server (e.g. +-- Report for backward-incompatible ('partial') queries that we could only run on the new server (e.g. -- queries with new functions added in the tested PR). create table partial_queries_report engine File(TSV, 'report/partial-queries-report.tsv') settings output_format_decimal_trailing_zeros = 1 @@ -829,7 +829,7 @@ create view query_runs as select * from file('analyze/query-runs.tsv', TSV, -- Guess the number of query runs used for this test. The number is required to -- calculate and check the average query run time in the report. -- We have to be careful, because we will encounter: --- 1) partial queries which run only on one server +-- 1) backward-incompatible ('partial') queries which run only on one server -- 3) some errors that make query run for a different number of times on a -- particular server. -- diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py index 960f23be95c..a3ca2edbfd5 100755 --- a/docker/test/performance-comparison/report.py +++ b/docker/test/performance-comparison/report.py @@ -30,7 +30,7 @@ faster_queries = 0 slower_queries = 0 unstable_queries = 0 very_unstable_queries = 0 -unstable_partial_queries = 0 +unstable_backward_incompatible_queries = 0 # max seconds to run one query by itself, not counting preparation allowed_single_run_time = 2 @@ -378,12 +378,12 @@ if args.report == "main": ] ) - def add_partial(): + def add_backward_incompatible(): rows = tsvRows("report/partial-queries-report.tsv") if not rows: return - global unstable_partial_queries, slow_average_tests, tables + global unstable_backward_incompatible_queries, slow_average_tests, tables text = tableStart("Partial Queries") columns = ["Median time, s", "Relative time variance", "Test", "#", "Query"] text += tableHeader(columns) @@ -392,7 +392,7 @@ if args.report == "main": anchor = f"{currentTableAnchor()}.{row[2]}.{row[3]}" if float(row[1]) > 0.10: attrs[1] = f'style="background: {color_bad}"' - unstable_partial_queries += 1 + unstable_backward_incompatible_queries += 1 errors_explained.append( [ f"The query no. {row[3]} of test '{row[2]}' has excessive variance of run time. Keep it below 10%" @@ -414,7 +414,7 @@ if args.report == "main": text += tableEnd() tables.append(text) - add_partial() + add_backward_incompatible() def add_changes(): rows = tsvRows("report/changed-perf.tsv") @@ -630,8 +630,8 @@ if args.report == "main": status = "failure" message_array.append(str(slower_queries) + " slower") - if unstable_partial_queries: - very_unstable_queries += unstable_partial_queries + if unstable_backward_incompatible_queries: + very_unstable_queries += unstable_backward_incompatible_queries status = "failure" # Don't show mildly unstable queries, only the very unstable ones we From 40ced78a7e326f20aba79cad3ad93e7b437c328e Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 22 Jan 2023 20:01:55 +0100 Subject: [PATCH 2/2] Update report.py --- docker/test/performance-comparison/report.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py index a3ca2edbfd5..782cf29863c 100755 --- a/docker/test/performance-comparison/report.py +++ b/docker/test/performance-comparison/report.py @@ -384,7 +384,7 @@ if args.report == "main": return global unstable_backward_incompatible_queries, slow_average_tests, tables - text = tableStart("Partial Queries") + text = tableStart("Backward-incompatible queries") columns = ["Median time, s", "Relative time variance", "Test", "#", "Query"] text += tableHeader(columns) attrs = ["" for c in columns]