Run only some queries in perf tests, not all combos

This commit is contained in:
Alexander Kuzmenkov 2020-09-10 19:57:26 +03:00
parent 7b50341dcc
commit 8689797efc
2 changed files with 26 additions and 3 deletions

View File

@ -121,7 +121,7 @@ function run_tests
then
# Use the explicitly set path to directory with test files.
test_prefix="$CHPC_TEST_PATH"
elif [ "$PR_TO_TEST" = "0" ]
elif [ "$PR_TO_TEST" == "0" ]
then
# When testing commits from master, use the older test files. This
# allows the tests to pass even when we add new functions and tests for
@ -155,6 +155,20 @@ function run_tests
test_files=$(ls "$test_prefix"/*.xml)
fi
# For PRs, test only a subset of queries, and run them less times.
# If the corresponding environment variables are already set, keep
# those values.
if [ "$PR_TO_TEST" == "0" ]
then
CHPC_TEST_RUNS=${CHPC_RUNS:-7}
CHPC_MAX_QUERIES=${CHPC_MAX_QUERIES:-15}
else
CHPC_TEST_RUNS=${CHPC_RUNS:-13}
CHPC_MAX_QUERIES=${CHPC_MAX_QUERIES:-0}
fi
export CHPC_TEST_RUNS
export CHPC_MAX_QUERIES
# Determine which concurrent benchmarks to run. For now, the only test
# we run as a concurrent benchmark is 'website'. Run it as benchmark if we
# are also going to run it as a normal test.
@ -187,6 +201,7 @@ function run_tests
# the grep is to filter out set -x output and keep only time output
{ \
time "$script_dir/perf.py" --host localhost localhost --port 9001 9002 \
--runs "$CHPC_RUNS" --max-queries "$CHPC_MAX_QUERIES" \
-- "$test" > "$test_name-raw.tsv" 2> "$test_name-err.log" ; \
} 2>&1 >/dev/null | grep -v ^+ >> "wall-clock-times.tsv" \
|| echo "Test $test_name failed with error code $?" >> "$test_name-err.log"

View File

@ -7,6 +7,7 @@ import clickhouse_driver
import xml.etree.ElementTree as et
import argparse
import pprint
import random
import re
import string
import time
@ -20,7 +21,8 @@ parser = argparse.ArgumentParser(description='Run performance test.')
parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, help='test description file')
parser.add_argument('--host', nargs='*', default=['localhost'], help="Server hostname(s). Corresponds to '--port' options.")
parser.add_argument('--port', nargs='*', default=[9000], help="Server port(s). Corresponds to '--host' options.")
parser.add_argument('--runs', type=int, default=int(os.environ.get('CHPC_RUNS', 13)), help='Number of query runs per server. Defaults to CHPC_RUNS environment variable.')
parser.add_argument('--runs', type=int, default=1, help='Number of query runs per server.')
parser.add_argument('--max-queries', type=int, default=None, help='Test no more than this number of queries, chosen at random.')
parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.')
parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.')
parser.add_argument('--print-settings', action='store_true', help='Print test settings and exit.')
@ -189,8 +191,14 @@ for conn_index, c in enumerate(connections):
c.execute(q)
print(f'fill\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
# Run the queries in randomized order, but preserve their indexes as specified
# in the test XML. To avoid using too much time, limit the number of queries
# we run per test.
queries_to_run = random.sample(range(0, len(test_queries)), args.max_queries or len(test_queries))
# Run test queries.
for query_index, q in enumerate(test_queries):
for query_index in queries_to_run:
q = test_queries[query_index]
query_prefix = f'{test_name}.query{query_index}'
# We have some crazy long queries (about 100kB), so trim them to a sane