mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 09:32:06 +00:00
If perf test definition changed, run everything + longer (as in master)
Also some other perf test fixes
This commit is contained in:
parent
0052bbdd84
commit
e7b9319e12
@ -114,8 +114,6 @@ function run_tests
|
||||
# Just check that the script runs at all
|
||||
"$script_dir/perf.py" --help > /dev/null
|
||||
|
||||
changed_test_files=""
|
||||
|
||||
# Find the directory with test files.
|
||||
if [ -v CHPC_TEST_PATH ]
|
||||
then
|
||||
@ -130,14 +128,6 @@ function run_tests
|
||||
else
|
||||
# For PRs, use newer test files so we can test these changes.
|
||||
test_prefix=right/performance
|
||||
|
||||
# If only the perf tests were changed in the PR, we will run only these
|
||||
# tests. The list of changed tests in changed-test.txt is prepared in
|
||||
# entrypoint.sh from git diffs, because it has the cloned repo. Used
|
||||
# to use rsync for that but it was really ugly and not always correct
|
||||
# (e.g. when the reference SHA is really old and has some other
|
||||
# differences to the tested SHA, besides the one introduced by the PR).
|
||||
changed_test_files=$(sed "s/tests\/performance/${test_prefix//\//\\/}/" changed-tests.txt)
|
||||
fi
|
||||
|
||||
# Determine which tests to run.
|
||||
@ -146,19 +136,26 @@ function run_tests
|
||||
# Run only explicitly specified tests, if any.
|
||||
# shellcheck disable=SC2010
|
||||
test_files=$(ls "$test_prefix" | grep "$CHPC_TEST_GREP" | xargs -I{} -n1 readlink -f "$test_prefix/{}")
|
||||
elif [ "$changed_test_files" != "" ]
|
||||
elif [ "$PR_TO_TEST" -ne 0 ] \
|
||||
&& [ "$(wc -l < changed-test-definitions.txt)" -gt 0 ] \
|
||||
&& [ "$(wc -l < changed-test-scripts.txt)" -eq 0 ] \
|
||||
&& [ "$(wc -l < other-changed-files.txt)" -eq 0 ]
|
||||
then
|
||||
# Use test files that changed in the PR.
|
||||
test_files="$changed_test_files"
|
||||
# If only the perf tests were changed in the PR, we will run only these
|
||||
# tests. The lists of changed files are prepared in entrypoint.sh because
|
||||
# it has the repository.
|
||||
test_files=$(sed "s/tests\/performance/${test_prefix//\//\\/}/" changed-test-definitions.txt)
|
||||
else
|
||||
# The default -- run all tests found in the test dir.
|
||||
test_files=$(ls "$test_prefix"/*.xml)
|
||||
fi
|
||||
|
||||
# For PRs, test only a subset of queries, and run them less times.
|
||||
# If the corresponding environment variables are already set, keep
|
||||
# those values.
|
||||
if [ "$PR_TO_TEST" == "0" ]
|
||||
# For PRs w/o changes in test definitons and scripts, test only a subset of
|
||||
# queries, and run them less times. If the corresponding environment variables
|
||||
# are already set, keep those values.
|
||||
if [ "$PR_TO_TEST" -ne 0 ] \
|
||||
&& [ "$(wc -l < changed-test-definitions.txt)" -eq 0 ] \
|
||||
&& [ "$(wc -l < changed-test-files.txt)" -eq 0 ]
|
||||
then
|
||||
CHPC_RUNS=${CHPC_RUNS:-13}
|
||||
CHPC_MAX_QUERIES=${CHPC_MAX_QUERIES:-0}
|
||||
@ -662,6 +659,38 @@ create table test_time engine Memory as
|
||||
from total_client_time_per_query full join queries using (test, query_index)
|
||||
group by test;
|
||||
|
||||
create view query_runs as select * from file('analyze/query-runs.tsv', TSV,
|
||||
'test text, query_index int, query_id text, version UInt8, time float');
|
||||
|
||||
--
|
||||
-- Guess the number of query runs used for this test. The number is required to
|
||||
-- calculate and check the average query run time in the report.
|
||||
-- We have to be careful, because we will encounter:
|
||||
-- 1) partial queries which run only on one server
|
||||
-- 2) short queries which run for a much higher number of times
|
||||
-- 3) some errors that make query run for a different number of times on a
|
||||
-- particular server.
|
||||
--
|
||||
create view test_runs as
|
||||
select test,
|
||||
-- Default to 7 runs if there are only 'short' queries in the test, and
|
||||
-- we can't determine the number of runs.
|
||||
if((ceil(medianOrDefaultIf(t.runs, not short), 0) as r) != 0, r, 7) runs
|
||||
from (
|
||||
select
|
||||
-- The query id is the same for both servers, so no need to divide here.
|
||||
uniqExact(query_id) runs,
|
||||
(test, query_index) in
|
||||
(select * from file('analyze/marked-short-queries.tsv', TSV,
|
||||
'test text, query_index int'))
|
||||
as short,
|
||||
test, query_index
|
||||
from query_runs
|
||||
group by test, query_index
|
||||
) t
|
||||
group by test
|
||||
;
|
||||
|
||||
create table test_times_report engine File(TSV, 'report/test-times.tsv') as
|
||||
select wall_clock_time_per_test.test, real,
|
||||
toDecimal64(total_client_time, 3),
|
||||
@ -669,11 +698,15 @@ create table test_times_report engine File(TSV, 'report/test-times.tsv') as
|
||||
short_queries,
|
||||
toDecimal64(query_max, 3),
|
||||
toDecimal64(real / queries, 3) avg_real_per_query,
|
||||
toDecimal64(query_min, 3)
|
||||
toDecimal64(query_min, 3),
|
||||
runs
|
||||
from test_time
|
||||
-- wall clock times are also measured for skipped tests, so don't
|
||||
-- do full join
|
||||
left join wall_clock_time_per_test using test
|
||||
-- wall clock times are also measured for skipped tests, so don't
|
||||
-- do full join
|
||||
left join wall_clock_time_per_test
|
||||
on wall_clock_time_per_test.test = test_time.test
|
||||
full join test_runs
|
||||
on test_runs.test = test_time.test
|
||||
order by avg_real_per_query desc;
|
||||
|
||||
-- report for all queries page, only main metric
|
||||
|
@ -97,13 +97,10 @@ then
|
||||
# tests for use by compare.sh. Compare to merge base, because master might be
|
||||
# far in the future and have unrelated test changes.
|
||||
base=$(git -C right/ch merge-base pr origin/master)
|
||||
git -C right/ch diff --name-only "$base" pr | tee changed-tests.txt
|
||||
if grep -vq '^tests/performance' changed-tests.txt
|
||||
then
|
||||
# Have some other changes besides the tests, so truncate the test list,
|
||||
# meaning, run all tests.
|
||||
: > changed-tests.txt
|
||||
fi
|
||||
git -C right/ch diff --name-only "$base" pr -- . | tee all-changed-files.txt
|
||||
git -C right/ch diff --name-only "$base" pr -- tests/performance | tee changed-test-definitions.txt
|
||||
git -C right/ch diff --name-only "$base" pr -- docker/test/performance-comparison | tee changed-test-scripts.txt
|
||||
git -C right/ch diff --name-only "$base" pr -- :!tests/performance :!docker/test/performance-comparison | tee other-changed-files.txt
|
||||
fi
|
||||
|
||||
# Set python output encoding so that we can print queries with Russian letters.
|
||||
|
@ -457,25 +457,25 @@ if args.report == 'main':
|
||||
return
|
||||
|
||||
columns = [
|
||||
'Test', #0
|
||||
'Test', #0
|
||||
'Wall clock time, s', #1
|
||||
'Total client time, s', #2
|
||||
'Total queries', #3
|
||||
'Ignored short queries', #4
|
||||
'Total queries', #3
|
||||
'Ignored short queries', #4
|
||||
'Longest query<br>(sum for all runs), s', #5
|
||||
'Avg wall clock time<br>(sum for all runs), s', #6
|
||||
'Shortest query<br>(sum for all runs), s', #7
|
||||
# 'Runs' #8
|
||||
]
|
||||
|
||||
text = tableStart('Test times')
|
||||
text += tableHeader(columns)
|
||||
|
||||
nominal_runs = 7 # FIXME pass this as an argument
|
||||
total_runs = (nominal_runs + 1) * 2 # one prewarm run, two servers
|
||||
allowed_average_run_time = allowed_single_run_time + 60 / total_runs; # some allowance for fill/create queries
|
||||
allowed_average_run_time = 3.75 # 60 seconds per test at 7 runs
|
||||
attrs = ['' for c in columns]
|
||||
for r in rows:
|
||||
anchor = f'{currentTableAnchor()}.{r[0]}'
|
||||
total_runs = (int(r[8]) + 1) * 2 # one prewarm run, two servers
|
||||
if float(r[6]) > allowed_average_run_time * total_runs:
|
||||
# FIXME should be 15s max -- investigate parallel_insert
|
||||
slow_average_tests += 1
|
||||
|
@ -1,15 +1,12 @@
|
||||
<test max_ignored_relative_change="0.3">
|
||||
<tags>
|
||||
<tag>columns_hashing</tag>
|
||||
</tags>
|
||||
|
||||
<preconditions>
|
||||
<table_exists>test.hits</table_exists>
|
||||
<table_exists>hits_10m_single</table_exists>
|
||||
<table_exists>hits_100m_single</table_exists>
|
||||
</preconditions>
|
||||
|
||||
<query><![CDATA[select sum(UserID + 1 in (select UserID from test.hits)) from test.hits]]></query>
|
||||
<query><![CDATA[select sum((UserID + 1, RegionID) in (select UserID, RegionID from test.hits)) from test.hits]]></query>
|
||||
<query><![CDATA[select sum(URL in (select URL from test.hits where URL != '')) from test.hits]]></query>
|
||||
<query><![CDATA[select sum(MobilePhoneModel in (select MobilePhoneModel from test.hits where MobilePhoneModel != '')) from test.hits]]></query>
|
||||
<query><![CDATA[select sum((MobilePhoneModel, UserID + 1) in (select MobilePhoneModel, UserID from test.hits where MobilePhoneModel != '')) from test.hits]]></query>
|
||||
<query>select sum(UserID + 1 in (select UserID from hits_100m_single)) from hits_100m_single</query>
|
||||
<query>select sum((UserID + 1, RegionID) in (select UserID, RegionID from hits_10m_single)) from hits_10m_single</query>
|
||||
<query>select sum(URL in (select URL from hits_10m_single where URL != '')) from hits_10m_single</query>
|
||||
<query>select sum(MobilePhoneModel in (select MobilePhoneModel from hits_10m_single where MobilePhoneModel != '')) from hits_10m_single</query>
|
||||
<query>select sum((MobilePhoneModel, UserID + 1) in (select MobilePhoneModel, UserID from hits_100m_single where MobilePhoneModel != '')) from hits_100m_single</query>
|
||||
</test>
|
||||
|
Loading…
Reference in New Issue
Block a user