performance comparison

2024-12-18 04:12:19 +00:00 · 2020-04-28 10:45:35 +03:00 · 2020-04-28 10:45:35 +03:00 · ed1576507b
commit ed1576507b
parent fce4072f9d
9 changed files with 131 additions and 95 deletions
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@ -100,7 +100,7 @@ function run_tests
    # changes.
    test_prefix=$([ "$PR_TO_TEST" == "0" ] && echo left || echo right)/performance

-    for x in {test-times,skipped-tests,wall-clock-times}.tsv
+    for x in {test-times,skipped-tests,wall-clock-times,report-thresholds,client-times}.tsv
    do
        rm -v "$x" ||:
        touch "$x"
@ -161,7 +161,8 @@ function run_tests
        mv "$test_name-err.log" "$test_name-warn.log"

        grep ^query "$test_name-raw.tsv" | cut -f2- > "$test_name-queries.tsv"
-        grep ^client-time "$test_name-raw.tsv" | cut -f2- > "$test_name-client-time.tsv"
+        sed -n 's/^client-time/$test_name/p' < "$test_name-raw.tsv" >> "client-times.tsv"
+        sed -n 's/^threshold/$test_name/p' < "$test_name-raw.tsv" >> "report-thresholds.tsv"
        skipped=$(grep ^skipped "$test_name-raw.tsv" | cut -f2-)
        if [ "$skipped" != "" ]
        then
@ -267,23 +268,17 @@ parallel --verbose --null < analyze-commands.txt
 function report
 {

-for x in {right,left}-{addresses,{query,query-thread,trace,metric}-log}.tsv
-do
-    # FIXME This loop builds column definitons from TSVWithNamesAndTypes in an
-    # absolutely atrocious way. This should be done by the file() function itself.
-    paste -d' ' \
-        <(sed -n '1{s/\t/\n/g;p;q}' "$x" | sed 's/\(^.*$\)/"\1"/') \
-        <(sed -n '2{s/\t/\n/g;p;q}' "$x" ) \
-        | tr '\n' ', ' | sed 's/,$//' > "$x.columns"
-done
+rm -r report ||:
+mkdir report ||:
+

 rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.tsv unstable-query-metrics.tsv changed-perf.tsv unstable-tests.tsv unstable-queries.tsv bad-tests.tsv slow-on-client.tsv all-queries.tsv ||:

-cat analyze-errors.log >> report-errors.rep ||:
-cat profile-errors.log >> report-errors.rep ||:
+cat analyze-errors.log >> report/errors.log ||:
+cat profile-errors.log >> report/errors.log ||:

 clickhouse-local --query "
-create table queries engine File(TSVWithNamesAndTypes, 'queries.rep')
+create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
    as select
        -- FIXME Comparison mode doesn't make sense for queries that complete
        -- immediately, so for now we pretend they don't exist. We don't want to
@ -291,36 +286,56 @@ create table queries engine File(TSVWithNamesAndTypes, 'queries.rep')
        -- but the right way to do this is not yet clear.
        (left + right) / 2 < 0.02 as short,

-        not short and abs(diff) > 0.05 and abs(diff) > threshold as changed,
+        not short and abs(diff) > report_threshold        and abs(diff) > stat_threshold as changed_fail,
+        not short and abs(diff) > report_threshold - 0.05 and abs(diff) > stat_threshold as changed_show,
        
-        not short and not changed and threshold > 0.05 as unstable,
-        
-        left, right, diff, threshold,
-        replaceAll(_file, '-report.tsv', '') test,
+        not short and not changed_fail and stat_threshold > report_threshold + 0.05 as unstable_fail,
+        not short and not changed_show and stat_threshold > report_threshold - 0.05 as unstable_show,
        
+        left, right, diff, stat_threshold,
+        if(report_threshold > 0, report_threshold, 0.10) as report_threshold,
+        reports.test,
        -- Truncate long queries.
        if(length(query) < 300, query, substr(query, 1, 298) || '...') query
-    from file('*-report.tsv', TSV, 'left float, right float, diff float, threshold float, query text');
+    from
+        (
+            select *,
+                replaceAll(_file, '-report.tsv', '') test
+            from file('*-report.tsv', TSV, 'left float, right float, diff float, stat_threshold float, query text')
+        ) reports
+        left join file('report-thresholds.tsv', TSV, 'test text, report_threshold float') thresholds
+        using test
+        ;

-create table changed_perf_tsv engine File(TSV, 'changed-perf.tsv') as
-    select left, right, diff, threshold, test, query from queries where changed
+-- keep the table in old format so that we can analyze new and old data together
+create table queries_old_format engine File(TSVWithNamesAndTypes, 'queries.rep')
+    as select short, changed_fail, unstable_fail, left, right, diff, stat_threshold, test, query
+    from queries
+    ;
+
+create table changed_perf_tsv engine File(TSV, 'report/changed-perf.tsv') as
+    select left, right, diff, stat_threshold, changed_fail, test, query from queries where changed_show
    order by abs(diff) desc;

-create table unstable_queries_tsv engine File(TSV, 'unstable-queries.tsv') as
-    select left, right, diff, threshold, test, query from queries where unstable
-    order by threshold desc;
+create table unstable_queries_tsv engine File(TSV, 'report/unstable-queries.tsv') as
+    select left, right, diff, stat_threshold, unstable_fail, test, query from queries where unstable_show
+    order by stat_threshold desc;

-create table unstable_tests_tsv engine File(TSV, 'bad-tests.tsv') as
-    select test, sum(unstable) u, sum(changed) c, u + c s from queries
+create table queries_for_flamegraph engine File(TSVWithNamesAndTypes, 'report/queries-for-flamegraph.tsv') as
+    select query, test from queries where unstable_show or changed_show
+    ;
+
+create table unstable_tests_tsv engine File(TSV, 'report/bad-tests.tsv') as
+    select test, sum(unstable_fail) u, sum(changed_fail) c, u + c s from queries
    group by test having s > 0 order by s desc;

-create table query_time engine Memory as select *, replaceAll(_file, '-client-time.tsv', '') test
-    from file('*-client-time.tsv', TSV, 'query text, client float, server float');
+create table query_time engine Memory as select *
+    from file('client-times.tsv', TSV, 'test text, query text, client float, server float');

 create table wall_clock engine Memory as select *
    from file('wall-clock-times.tsv', TSV, 'test text, real float, user float, system float');

-create table slow_on_client_tsv engine File(TSV, 'slow-on-client.tsv') as
+create table slow_on_client_tsv engine File(TSV, 'report/slow-on-client.tsv') as
    select client, server, floor(client/server, 3) p, query
    from query_time where p > 1.02 order by p desc;

@ -334,7 +349,7 @@ create table test_time engine Memory as
    where query_time.query = queries.query
    group by test;

-create table test_times_tsv engine File(TSV, 'test-times.tsv') as
+create table test_times_tsv engine File(TSV, 'report/test-times.tsv') as
    select wall_clock.test, real,
        floor(total_client_time, 3),
        queries,
@ -345,20 +360,30 @@ create table test_times_tsv engine File(TSV, 'test-times.tsv') as
    from test_time join wall_clock using test
    order by avg_real_per_query desc;

-create table all_tests_tsv engine File(TSV, 'all-queries.tsv') as
-    select left, right, diff,
+create table all_tests_tsv engine File(TSV, 'report/all-queries.tsv') as
+    select changed_fail, unstable_fail,
+        left, right, diff,
        floor(left > right ? left / right : right / left, 3),
-        threshold, test, query
+        stat_threshold, test, query
    from queries order by test, query;
-" 2> >(head -2 >> report-errors.rep) ||:
+" 2> >(tee -a report/errors.log 1>&2)
+
+for x in {right,left}-{addresses,{query,query-thread,trace,metric}-log}.tsv
+do
+    # FIXME This loop builds column definitons from TSVWithNamesAndTypes in an
+    # absolutely atrocious way. This should be done by the file() function itself.
+    paste -d' ' \
+        <(sed -n '1{s/\t/\n/g;p;q}' "$x" | sed 's/\(^.*$\)/"\1"/') \
+        <(sed -n '2{s/\t/\n/g;p;q}' "$x" ) \
+        | tr '\n' ', ' | sed 's/,$//' > "$x.columns"
+done

 for version in {right,left}
 do
 clickhouse-local --query "
-create view queries as
-    select * from file('queries.rep', TSVWithNamesAndTypes,
-        'short int, changed int, unstable int, left float, right float,
-            diff float, threshold float, test text, query text');
+create view queries_for_flamegraph as
+    select * from file('report/queries-for-flamegraph.tsv', TSVWithNamesAndTypes,
+        'query text, test text');

 create view query_log as select *
    from file('$version-query-log.tsv', TSVWithNamesAndTypes,
@ -377,9 +402,9 @@ create table addresses_join_$version engine Join(any, left, address) as

 create table unstable_query_runs engine File(TSVWithNamesAndTypes,
        'unstable-query-runs.$version.rep') as
-    select query_id, query from query_log
-    join queries using query
-    where query_id not like 'prewarm %' and (unstable or changed)
+    select query, query_id from query_log
+    where query in (select query from queries_for_flamegraph)
+        and query_id not like 'prewarm %'
    ;

 create table unstable_query_log engine File(Vertical,
@ -427,10 +452,10 @@ create table metric_devation engine File(TSVWithNamesAndTypes,
    from (select * from unstable_run_metrics
        union all select * from unstable_run_traces
        union all select * from unstable_run_metrics_2) mm
-    join queries using query
+    join queries_for_flamegraph using query
    group by query, metric
    having d > 0.5
-    order by any(threshold) desc, query desc, d desc
+    order by query desc, d desc
    ;

 create table stacks engine File(TSV, 'stacks.$version.rep') as
@ -447,7 +472,7 @@ create table stacks engine File(TSV, 'stacks.$version.rep') as
    join unstable_query_runs using query_id
    group by query, trace
    ;
-" 2> >(head -2 >> report-errors.rep) ||: # do not run in parallel because they use the same data dir for StorageJoins which leads to weird errors.
+" 2> >(tee -a report/errors.log 1>&2) # do not run in parallel because they use the same data dir for StorageJoins which leads to weird errors.
 done
 wait

@ -524,12 +549,12 @@ case "$stage" in
    echo Servers stopped.
    ;&
 "analyze_queries")
-    time analyze_queries ||:
+    time analyze_queries
    ;&
 "report")
-    time report ||:
+    time report

-    time "$script_dir/report.py" --report=all-queries > all-queries.html 2> >(head -2 >> report-errors.rep) ||:
+    time "$script_dir/report.py" --report=all-queries > all-queries.html 2> >(tee -a report/errors.log 1>&2) ||:
    time "$script_dir/report.py" > report.html
    ;&
 esac
--- a/docker/test/performance-comparison/download.sh
+++ b/docker/test/performance-comparison/download.sh
@ -30,7 +30,8 @@ function download
        wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/performance/performance.tgz" -O- | tar -C left --strip-components=1 -zxv  &
        wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$right_pr/$right_sha/performance/performance.tgz" -O- | tar -C right --strip-components=1 -zxv &
    else
-        wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/performance/performance.tgz" -O- | tar -C left --strip-components=1 -zxv && cp -a left right &
+        mkdir right ||:
+        wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/performance/performance.tgz" -O- | tar -C left --strip-components=1 -zxv && cp -a left/* right &
    fi

    for dataset_name in $datasets
--- a/docker/test/performance-comparison/eqmed.sql
+++ b/docker/test/performance-comparison/eqmed.sql
@ -9,8 +9,12 @@ from
   (
      -- quantiles of randomization distributions
      select quantileExact(0.999)(abs(time_by_label[1] - time_by_label[2]) as d) threshold
-      -- uncomment to see what the distribution is really like
-      -- , sumMap([d], [1]) full_histogram
+      ---- uncomment to see what the distribution is really like
+      --, uniqExact(d) u
+      --, arraySort(x->x.1,
+      --      arrayZip(
+      --          (sumMap([d], [1]) as f).1,
+      --          f.2)) full_histogram
      from
         (
            select virtual_run, groupArrayInsertAt(median_time, random_label) time_by_label -- make array 'random label' -> 'median time'
--- a/docker/test/performance-comparison/perf.py
+++ b/docker/test/performance-comparison/perf.py
@ -48,6 +48,10 @@ infinite_sign = root.find('.//average_speed_not_changing_for_ms')
 if infinite_sign is not None:
    raise Exception('Looks like the test is infinite (sign 1)')

+# Print report threshold for the test if it is set.
+if 'max_ignored_relative_change' in root.attrib:
+    print(f'report-threshold\t{root.attrib["max_ignored_relative_change"]}')
+
 # Open connections
 servers = [{'host': host, 'port': port} for (host, port) in zip(args.host, args.port)]
 connections = [clickhouse_driver.Client(**server) for server in servers]
--- a/docker/test/performance-comparison/report.py
+++ b/docker/test/performance-comparison/report.py
@ -95,7 +95,8 @@ def tableRow(cell_values, cell_attributes = []):
    return tr(''.join([td(v, a)
        for v, a in itertools.zip_longest(
            cell_values, cell_attributes,
-            fillvalue = '')]))
+            fillvalue = '')
+        if a is not None]))

 def tableHeader(r):
    return tr(''.join([th(f) for f in r]))
@ -148,7 +149,7 @@ if args.report == 'main':
             open('right-commit.txt').read()]]])

    def print_changes():
-        rows = tsvRows('changed-perf.tsv')
+        rows = tsvRows('report/changed-perf.tsv')
        if not rows:
            return

@ -160,22 +161,25 @@ if args.report == 'main':
            'New, s',                                          # 1
            'Relative difference (new&nbsp;-&nbsp;old)/old',   # 2
            'p&nbsp;<&nbsp;0.001 threshold',                   # 3
-            'Test',                                            # 4
-            'Query',                                           # 5
+            # Failed                                           # 4
+            'Test',                                            # 5
+            'Query',                                           # 6
            ]

        print(tableHeader(columns))

        attrs = ['' for c in columns]
+        attrs[4] = None
        for row in rows:
-            attrs[2] = ''
-            if abs(float(row[2])) > 0.10:
+            if int(row[4]):
                if float(row[2]) < 0.:
                    faster_queries += 1
                    attrs[2] = 'style="background: #adbdff"'
                else:
                    slower_queries += 1
                    attrs[2] = 'style="background: #ffb0a0"'
+            else:
+                attrs[2] = ''

            print(tableRow(row, attrs))

@ -183,7 +187,7 @@ if args.report == 'main':

    print_changes()

-    slow_on_client_rows = tsvRows('slow-on-client.tsv')
+    slow_on_client_rows = tsvRows('report/slow-on-client.tsv')
    error_tests += len(slow_on_client_rows)
    printSimpleTable('Slow on client',
        ['Client time, s', 'Server time, s', 'Ratio', 'Query'],
@ -193,7 +197,7 @@ if args.report == 'main':
        global unstable_queries
        global very_unstable_queries

-        unstable_rows = tsvRows('unstable-queries.tsv')
+        unstable_rows = tsvRows('report/unstable-queries.tsv')
        if not unstable_rows:
            return

@ -204,16 +208,18 @@ if args.report == 'main':
            'New, s', #1
            'Relative difference (new&nbsp;-&nbsp;old)/old', #2
            'p&nbsp;<&nbsp;0.001 threshold', #3
-            'Test', #4
-            'Query' #5
+            # Failed #4
+            'Test', #5
+            'Query' #6
        ]

        print(tableStart('Unstable queries'))
        print(tableHeader(columns))

        attrs = ['' for c in columns]
+        attrs[4] = None
        for r in unstable_rows:
-            if float(r[3]) > 0.2:
+            if int(r[4]):
                very_unstable_queries += 1
                attrs[3] = 'style="background: #ffb0a0"'
            else:
@ -234,11 +240,11 @@ if args.report == 'main':

    printSimpleTable('Tests with most unstable queries',
        ['Test', 'Unstable', 'Changed perf', 'Total not OK'],
-        tsvRows('bad-tests.tsv'))
+        tsvRows('report/bad-tests.tsv'))

    def print_test_times():
        global slow_average_tests
-        rows = tsvRows('test-times.tsv')
+        rows = tsvRows('report/test-times.tsv')
        if not rows:
            return

@ -279,7 +285,7 @@ if args.report == 'main':
    print_test_times()

    # Add the errors reported by various steps of comparison script
-    report_errors += [l.strip() for l in open('report-errors.rep')]
+    report_errors += [l.strip() for l in open('report/errors.log')]
    if len(report_errors):
        print(tableStart('Errors while building the report'))
        print(tableHeader(['Error']))
@ -346,40 +352,42 @@ elif args.report == 'all-queries':
             open('right-commit.txt').read()]]])

    def print_all_queries():
-        rows = tsvRows('all-queries.tsv')
+        rows = tsvRows('report/all-queries.tsv')
        if not rows:
            return

        columns = [
-            'Old, s', #0
-            'New, s', #1
-            'Relative difference (new&nbsp;-&nbsp;old)/old', #2
-            'Times speedup/slowdown',                 #3
-            'p&nbsp;<&nbsp;0.001 threshold',          #4
-            'Test',                                   #5
-            'Query',                                  #6
+            # Changed #0
+            # Unstable #1
+            'Old, s', #2
+            'New, s', #3
+            'Relative difference (new&nbsp;-&nbsp;old)/old', #4
+            'Times speedup/slowdown',                 #5
+            'p&nbsp;<&nbsp;0.001 threshold',          #6
+            'Test',                                   #7
+            'Query',                                  #8
            ]

        print(tableStart('All query times'))
        print(tableHeader(columns))

        attrs = ['' for c in columns]
+        attrs[0] = None
+        attrs[1] = None
        for r in rows:
-            threshold = float(r[3])
-            if threshold > 0.2:
-                attrs[4] = 'style="background: #ffb0a0"'
+            if int(r[1]):
+                attrs[6] = 'style="background: #ffb0a0"'
+            else:
+                attrs[6] = ''
+
+            if int(r[0]):
+                if float(r[4]) > 0.:
+                    attrs[4] = 'style="background: #ffb0a0"'
+                else:
+                    attrs[4] = 'style="background: #adbdff"'
            else:
                attrs[4] = ''

-            diff = float(r[2])
-            if abs(diff) > threshold and threshold >= 0.05:
-                if diff > 0.:
-                    attrs[3] = 'style="background: #ffb0a0"'
-                else:
-                    attrs[3] = 'style="background: #adbdff"'
-            else:
-                attrs[3] = ''
-
            print(tableRow(r, attrs))

        print(tableEnd())
--- a/tests/performance/codecs_int_insert.xml
+++ b/tests/performance/codecs_int_insert.xml
@ -1,6 +1,4 @@
-<test>
-
-
+<test max_ignored_relative_change="0.2">
    <substitutions>
        <substitution>
            <name>codec</name>
--- a/tests/performance/if_array_string.xml
+++ b/tests/performance/if_array_string.xml
@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
    <query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(rand() % 2 ? ['Hello', 'World'] : ['a', 'b', 'c'])</query>
    <query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(rand() % 2 ? materialize(['Hello', 'World']) : ['a', 'b', 'c'])</query>
    <query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(rand() % 2 ? ['Hello', 'World'] : materialize(['a', 'b', 'c']))</query>
--- a/tests/performance/math.xml
+++ b/tests/performance/math.xml
@ -1,7 +1,4 @@
-<test>
-
-
-
+<test max_ignored_relative_change="0.2">
    <substitutions>
        <substitution>
           <name>func_slow</name>
--- a/tests/performance/visit_param_extract_raw.xml
+++ b/tests/performance/visit_param_extract_raw.xml
@ -1,5 +1,4 @@
-<test>
-
+<test max_ignored_relative_change="0.2">
    <substitutions>
        <substitution>
           <name>param</name>