From e7b9319e120147735019f3a89a87b45087f86815 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Fri, 18 Sep 2020 16:27:50 +0300
Subject: [PATCH] If perf test definition changed, run everything + longer (as
 in master)

Also some other perf test fixes
---
 docker/test/performance-comparison/compare.sh | 75 +++++++++++++------
 .../test/performance-comparison/entrypoint.sh | 11 +--
 docker/test/performance-comparison/report.py  | 12 +--
 tests/performance/columns_hashing.xml         | 17 ++---
 4 files changed, 71 insertions(+), 44 deletions(-)

diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh
index 8d7947b46a5..886dd0b74f6 100755
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@@ -114,8 +114,6 @@ function run_tests
     # Just check that the script runs at all
     "$script_dir/perf.py" --help > /dev/null
 
-    changed_test_files=""
-
     # Find the directory with test files.
     if [ -v CHPC_TEST_PATH ]
     then
@@ -130,14 +128,6 @@ function run_tests
     else
         # For PRs, use newer test files so we can test these changes.
         test_prefix=right/performance
-
-        # If only the perf tests were changed in the PR, we will run only these
-        # tests. The list of changed tests in changed-test.txt is prepared in
-        # entrypoint.sh from git diffs, because it has the cloned repo.  Used
-        # to use rsync for that but it was really ugly and not always correct
-        # (e.g. when the reference SHA is really old and has some other
-        # differences to the tested SHA, besides the one introduced by the PR).
-        changed_test_files=$(sed "s/tests\/performance/${test_prefix//\//\\/}/" changed-tests.txt)
     fi
 
     # Determine which tests to run.
@@ -146,19 +136,26 @@ function run_tests
         # Run only explicitly specified tests, if any.
         # shellcheck disable=SC2010
         test_files=$(ls "$test_prefix" | grep "$CHPC_TEST_GREP" | xargs -I{} -n1 readlink -f "$test_prefix/{}")
-    elif [ "$changed_test_files" != "" ]
+    elif [ "$PR_TO_TEST" -ne 0 ] \
+        && [ "$(wc -l < changed-test-definitions.txt)" -gt 0 ] \
+        && [ "$(wc -l < changed-test-scripts.txt)" -eq 0 ] \
+        && [ "$(wc -l < other-changed-files.txt)" -eq 0 ]
     then
-        # Use test files that changed in the PR.
-        test_files="$changed_test_files"
+        # If only the perf tests were changed in the PR, we will run only these
+        # tests. The lists of changed files are prepared in entrypoint.sh because
+        # it has the repository.
+        test_files=$(sed "s/tests\/performance/${test_prefix//\//\\/}/" changed-test-definitions.txt)
     else
         # The default -- run all tests found in the test dir.
         test_files=$(ls "$test_prefix"/*.xml)
     fi
 
-    # For PRs, test only a subset of queries, and run them less times.
-    # If the corresponding environment variables are already set, keep
-    # those values.
-    if [ "$PR_TO_TEST" == "0" ]
+    # For PRs w/o changes in test definitons and scripts, test only a subset of
+    # queries, and run them less times. If the corresponding environment variables
+    # are already set, keep those values.
+    if [ "$PR_TO_TEST" -ne 0 ] \
+        && [ "$(wc -l < changed-test-definitions.txt)" -eq 0 ] \
+        && [ "$(wc -l < changed-test-files.txt)" -eq 0 ]
     then
         CHPC_RUNS=${CHPC_RUNS:-13}
         CHPC_MAX_QUERIES=${CHPC_MAX_QUERIES:-0}
@@ -662,6 +659,38 @@ create table test_time engine Memory as
     from total_client_time_per_query full join queries using (test, query_index)
     group by test;
 
+create view query_runs as select * from file('analyze/query-runs.tsv', TSV,
+    'test text, query_index int, query_id text, version UInt8, time float');
+
+--
+-- Guess the number of query runs used for this test. The number is required to
+-- calculate and check the average query run time in the report.
+-- We have to be careful, because we will encounter:
+--  1) partial queries which run only on one server
+--  2) short queries which run for a much higher number of times
+--  3) some errors that make query run for a different number of times on a
+--     particular server.
+--
+create view test_runs as
+    select test,
+        -- Default to 7 runs if there are only 'short' queries in the test, and
+        -- we can't determine the number of runs.
+        if((ceil(medianOrDefaultIf(t.runs, not short), 0) as r) != 0, r, 7) runs
+    from (
+        select
+            -- The query id is the same for both servers, so no need to divide here.
+            uniqExact(query_id) runs,
+            (test, query_index) in
+                (select * from file('analyze/marked-short-queries.tsv', TSV,
+                    'test text, query_index int'))
+            as short,
+            test, query_index
+        from query_runs
+        group by test, query_index
+        ) t
+    group by test
+    ;
+
 create table test_times_report engine File(TSV, 'report/test-times.tsv') as
     select wall_clock_time_per_test.test, real,
         toDecimal64(total_client_time, 3),
@@ -669,11 +698,15 @@ create table test_times_report engine File(TSV, 'report/test-times.tsv') as
         short_queries,
         toDecimal64(query_max, 3),
         toDecimal64(real / queries, 3) avg_real_per_query,
-        toDecimal64(query_min, 3)
+        toDecimal64(query_min, 3),
+        runs
     from test_time
-    -- wall clock times are also measured for skipped tests, so don't
-    -- do full join
-    left join wall_clock_time_per_test using test
+        -- wall clock times are also measured for skipped tests, so don't
+        -- do full join
+        left join wall_clock_time_per_test
+            on wall_clock_time_per_test.test = test_time.test
+        full join test_runs
+            on test_runs.test = test_time.test
     order by avg_real_per_query desc;
 
 -- report for all queries page, only main metric
diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh
index 9e9a46a3ce6..ed2e542eadd 100755
--- a/docker/test/performance-comparison/entrypoint.sh
+++ b/docker/test/performance-comparison/entrypoint.sh
@@ -97,13 +97,10 @@ then
     # tests for use by compare.sh. Compare to merge base, because master might be
     # far in the future and have unrelated test changes.
     base=$(git -C right/ch merge-base pr origin/master)
-    git -C right/ch diff --name-only "$base" pr | tee changed-tests.txt
-    if grep -vq '^tests/performance' changed-tests.txt
-    then
-        # Have some other changes besides the tests, so truncate the test list,
-        # meaning, run all tests.
-        : > changed-tests.txt
-    fi
+    git -C right/ch diff --name-only "$base" pr -- . | tee all-changed-files.txt
+    git -C right/ch diff --name-only "$base" pr -- tests/performance | tee changed-test-definitions.txt
+    git -C right/ch diff --name-only "$base" pr -- docker/test/performance-comparison | tee changed-test-scripts.txt
+    git -C right/ch diff --name-only "$base" pr -- :!tests/performance :!docker/test/performance-comparison | tee other-changed-files.txt
 fi
 
 # Set python output encoding so that we can print queries with Russian letters.
diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py
index e9e2ac68c1e..ee67c3c0457 100755
--- a/docker/test/performance-comparison/report.py
+++ b/docker/test/performance-comparison/report.py
@@ -457,25 +457,25 @@ if args.report == 'main':
             return
 
         columns = [
-            'Test',                                          #0
+            'Test',                                               #0
             'Wall clock time,&nbsp;s',                            #1
             'Total client time,&nbsp;s',                          #2
-            'Total queries',                                 #3
-            'Ignored short queries',                         #4
+            'Total queries',                                      #3
+            'Ignored short queries',                              #4
             'Longest query<br>(sum for all runs),&nbsp;s',        #5
             'Avg wall clock time<br>(sum for all runs),&nbsp;s',  #6
             'Shortest query<br>(sum for all runs),&nbsp;s',       #7
+            # 'Runs'                                              #8
             ]
 
         text = tableStart('Test times')
         text += tableHeader(columns)
 
-        nominal_runs = 7  # FIXME pass this as an argument
-        total_runs = (nominal_runs + 1) * 2  # one prewarm run, two servers
-        allowed_average_run_time = allowed_single_run_time + 60 / total_runs; # some allowance for fill/create queries
+        allowed_average_run_time = 3.75 # 60 seconds per test at 7 runs
         attrs = ['' for c in columns]
         for r in rows:
             anchor = f'{currentTableAnchor()}.{r[0]}'
+            total_runs = (int(r[8]) + 1) * 2  # one prewarm run, two servers
             if float(r[6]) > allowed_average_run_time * total_runs:
                 # FIXME should be 15s max -- investigate parallel_insert
                 slow_average_tests += 1
diff --git a/tests/performance/columns_hashing.xml b/tests/performance/columns_hashing.xml
index fb340c20ccd..271fff6e543 100644
--- a/tests/performance/columns_hashing.xml
+++ b/tests/performance/columns_hashing.xml
@@ -1,15 +1,12 @@
 <test max_ignored_relative_change="0.3">
-    <tags>
-        <tag>columns_hashing</tag>
-    </tags>
-
     <preconditions>
-        <table_exists>test.hits</table_exists>
+        <table_exists>hits_10m_single</table_exists>
+        <table_exists>hits_100m_single</table_exists>
     </preconditions>
 
-    <query><![CDATA[select sum(UserID + 1 in (select UserID from test.hits)) from test.hits]]></query>
-    <query><![CDATA[select sum((UserID + 1, RegionID) in (select UserID, RegionID from test.hits)) from test.hits]]></query>
-    <query><![CDATA[select sum(URL in (select URL from test.hits where URL != '')) from test.hits]]></query>
-    <query><![CDATA[select sum(MobilePhoneModel in (select MobilePhoneModel from test.hits where MobilePhoneModel != '')) from test.hits]]></query>
-    <query><![CDATA[select sum((MobilePhoneModel, UserID + 1) in (select MobilePhoneModel, UserID from test.hits where MobilePhoneModel != '')) from test.hits]]></query>
+    <query>select sum(UserID + 1 in (select UserID from hits_100m_single)) from hits_100m_single</query>
+    <query>select sum((UserID + 1, RegionID) in (select UserID, RegionID from hits_10m_single)) from hits_10m_single</query>
+    <query>select sum(URL in (select URL from hits_10m_single where URL != '')) from hits_10m_single</query>
+    <query>select sum(MobilePhoneModel in (select MobilePhoneModel from hits_10m_single where MobilePhoneModel != '')) from hits_10m_single</query>
+    <query>select sum((MobilePhoneModel, UserID + 1) in (select MobilePhoneModel, UserID from hits_100m_single where MobilePhoneModel != '')) from hits_100m_single</query>
 </test>