ClickHouse/docker/test/performance-comparison/perf.py

413 lines
16 KiB
Python
Raw Normal View History

#!/usr/bin/python3
import argparse
2020-09-02 03:29:36 +00:00
import clickhouse_driver
import itertools
import functools
import math
import os
import pprint
import random
import re
2020-09-02 03:29:36 +00:00
import statistics
2020-03-03 13:38:45 +00:00
import string
2020-09-02 03:29:36 +00:00
import sys
2020-01-16 19:39:07 +00:00
import time
import traceback
2020-09-02 03:29:36 +00:00
import xml.etree.ElementTree as et
2020-09-23 08:21:55 +00:00
from threading import Thread
from scipy import stats
2020-09-24 11:46:03 +00:00
total_start_seconds = time.perf_counter()
stage_start_seconds = total_start_seconds
def reportStageEnd(stage):
global stage_start_seconds, total_start_seconds
current = time.perf_counter()
print(f'stage\t{stage}\t{current - stage_start_seconds:.3f}\t{current - total_start_seconds:.3f}')
stage_start_seconds = current
2020-05-25 01:03:21 +00:00
def tsv_escape(s):
return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','')
2020-09-24 11:46:03 +00:00
parser = argparse.ArgumentParser(description='Run performance test.')
2020-01-10 14:06:07 +00:00
# Explicitly decode files as UTF-8 because sometimes we have Russian characters in queries, and LANG=C is set.
parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, help='test description file')
2020-09-24 13:24:44 +00:00
parser.add_argument('--host', nargs='*', default=['localhost'], help="Space-separated list of server hostname(s). Corresponds to '--port' options.")
parser.add_argument('--port', nargs='*', default=[9000], help="Space-separated list of server port(s). Corresponds to '--host' options.")
parser.add_argument('--runs', type=int, default=1, help='Number of query runs per server.')
parser.add_argument('--max-queries', type=int, default=None, help='Test no more than this number of queries, chosen at random.')
2020-09-24 13:24:44 +00:00
parser.add_argument('--queries-to-run', nargs='*', type=int, default=None, help='Space-separated list of indexes of queries to test.')
2020-09-25 09:52:09 +00:00
parser.add_argument('--profile-seconds', type=int, default=0, help='For how many seconds to profile a query for which the performance has changed.')
parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.')
2020-06-23 12:30:45 +00:00
parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.')
parser.add_argument('--print-settings', action='store_true', help='Print test settings and exit.')
args = parser.parse_args()
2020-09-24 11:46:03 +00:00
reportStageEnd('start')
test_name = os.path.splitext(os.path.basename(args.file[0].name))[0]
tree = et.parse(args.file[0])
root = tree.getroot()
2020-09-24 11:46:03 +00:00
reportStageEnd('parse')
2020-03-03 13:38:45 +00:00
# Process query parameters
subst_elems = root.findall('substitutions/substitution')
2020-03-03 13:38:45 +00:00
available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... }
for e in subst_elems:
available_parameters[e.find('name').text] = [v.text for v in e.findall('values/value')]
2020-06-23 12:09:54 +00:00
# Takes parallel lists of templates, substitutes them with all combos of
# parameters. The set of parameters is determined based on the first list.
# Note: keep the order of queries -- sometimes we have DROP IF EXISTS
2020-02-21 19:58:52 +00:00
# followed by CREATE in create queries section, so the order matters.
def substitute_parameters(query_templates, other_templates = []):
2020-06-23 12:09:54 +00:00
query_results = []
other_results = [[]] * (len(other_templates))
for i, q in enumerate(query_templates):
2020-03-03 13:38:45 +00:00
keys = set(n for _, n, _, _ in string.Formatter().parse(q) if n)
values = [available_parameters[k] for k in keys]
2020-06-23 12:09:54 +00:00
combos = itertools.product(*values)
for c in combos:
with_keys = dict(zip(keys, c))
query_results.append(q.format(**with_keys))
for j, t in enumerate(other_templates):
other_results[j].append(t[i].format(**with_keys))
if len(other_templates):
return query_results, other_results
2020-06-23 12:09:54 +00:00
else:
return query_results
# Build a list of test queries, substituting parameters to query templates,
# and reporting the queries marked as short.
test_queries = []
2020-09-02 03:29:36 +00:00
is_short = []
for e in root.findall('query'):
2020-09-02 03:29:36 +00:00
new_queries, [new_is_short] = substitute_parameters([e.text], [[e.attrib.get('short', '0')]])
test_queries += new_queries
2020-09-02 03:29:36 +00:00
is_short += [eval(s) for s in new_is_short]
assert(len(test_queries) == len(is_short))
2020-09-28 12:09:08 +00:00
# If we're given a list of queries to run, check that it makes sense.
for i in args.queries_to_run or []:
if i < 0 or i >= len(test_queries):
print(f'There is no query no. {i} in this test, only [{0}-{len(test_queries) - 1}] are present')
exit(1)
2020-09-28 12:09:08 +00:00
# If we're only asked to print the queries, do that and exit.
2020-06-23 12:30:45 +00:00
if args.print_queries:
2020-09-28 12:09:08 +00:00
for i in args.queries_to_run or range(0, len(test_queries)):
print(test_queries[i])
exit(0)
2020-09-02 03:29:36 +00:00
# Print short queries
for i, s in enumerate(is_short):
if s:
print(f'short\t{i}')
2020-06-23 12:30:45 +00:00
# If we're only asked to print the settings, do that and exit. These are settings
# for clickhouse-benchmark, so we print them as command line arguments, e.g.
# '--max_memory_usage=10000000'.
if args.print_settings:
for s in root.findall('settings/*'):
print(f'--{s.tag}={s.text}')
exit(0)
2020-03-03 10:47:32 +00:00
# Skip long tests
if not args.long:
for tag in root.findall('.//tag'):
if tag.text == 'long':
print('skipped\tTest is tagged as long.')
sys.exit(0)
2020-03-03 10:47:32 +00:00
2020-04-28 07:45:35 +00:00
# Print report threshold for the test if it is set.
2020-09-29 19:06:45 +00:00
ignored_relative_change = 0.05
2020-04-28 07:45:35 +00:00
if 'max_ignored_relative_change' in root.attrib:
2020-09-29 19:06:45 +00:00
ignored_relative_change = float(root.attrib["max_ignored_relative_change"])
print(f'report-threshold\t{ignored_relative_change}')
2020-04-28 07:45:35 +00:00
2020-09-24 11:46:03 +00:00
reportStageEnd('before-connect')
# Open connections
2020-09-28 18:43:47 +00:00
servers = [{'host': host or args.host[0], 'port': port or args.port[0]} for (host, port) in itertools.zip_longest(args.host, args.port)]
2020-09-02 03:29:36 +00:00
all_connections = [clickhouse_driver.Client(**server) for server in servers]
2020-09-28 18:43:47 +00:00
for i, s in enumerate(servers):
print(f'server\t{i}\t{s["host"]}\t{s["port"]}')
2020-02-14 12:55:47 +00:00
2020-09-24 11:46:03 +00:00
reportStageEnd('connect')
2020-03-02 15:05:58 +00:00
# Run drop queries, ignoring errors. Do this before all other activity, because
# clickhouse_driver disconnects on error (this is not configurable), and the new
# connection loses the changes in settings.
drop_query_templates = [q.text for q in root.findall('drop_query')]
2020-03-03 13:38:45 +00:00
drop_queries = substitute_parameters(drop_query_templates)
2020-09-02 03:29:36 +00:00
for conn_index, c in enumerate(all_connections):
for q in drop_queries:
try:
c.execute(q)
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
except:
2020-01-16 19:39:07 +00:00
pass
2020-09-24 11:46:03 +00:00
reportStageEnd('drop-1')
2020-06-05 12:53:47 +00:00
# Apply settings.
# If there are errors, report them and continue -- maybe a new test uses a setting
# that is not in master, but the queries can still run. If we have multiple
# settings and one of them throws an exception, all previous settings for this
# connection will be reset, because the driver reconnects on error (not
# configurable). So the end result is uncertain, but hopefully we'll be able to
# run at least some queries.
2020-03-02 15:05:58 +00:00
settings = root.findall('settings/*')
2020-09-02 03:29:36 +00:00
for conn_index, c in enumerate(all_connections):
2020-03-02 15:05:58 +00:00
for s in settings:
2020-06-05 12:53:47 +00:00
try:
q = f"set {s.tag} = '{s.text}'"
c.execute(q)
print(f'set\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
2020-06-05 12:53:47 +00:00
except:
print(traceback.format_exc(), file=sys.stderr)
2020-03-02 15:05:58 +00:00
2020-09-24 11:46:03 +00:00
reportStageEnd('settings')
2020-03-02 15:05:58 +00:00
# Check tables that should exist. If they don't exist, just skip this test.
tables = [e.text for e in root.findall('preconditions/table_exists')]
for t in tables:
2020-09-02 03:29:36 +00:00
for c in all_connections:
2020-03-02 15:05:58 +00:00
try:
2020-03-11 21:07:34 +00:00
res = c.execute("select 1 from {} limit 1".format(t))
2020-03-02 15:05:58 +00:00
except:
2020-05-25 01:03:21 +00:00
exception_message = traceback.format_exception_only(*sys.exc_info()[:2])[-1]
skipped_message = ' '.join(exception_message.split('\n')[:2])
print(f'skipped\t{tsv_escape(skipped_message)}')
2020-03-02 15:05:58 +00:00
sys.exit(0)
2020-09-24 11:46:03 +00:00
reportStageEnd('preconditions')
2020-09-23 08:21:55 +00:00
# Run create and fill queries. We will run them simultaneously for both servers,
# to save time.
# The weird search is to keep the relative order of elements, which matters, and
# etree doesn't support the appropriate xpath query.
create_query_templates = [q.text for q in root.findall('./*') if q.tag in ('create_query', 'fill_query')]
2020-03-03 13:38:45 +00:00
create_queries = substitute_parameters(create_query_templates)
# Disallow temporary tables, because the clickhouse_driver reconnects on errors,
# and temporary tables are destroyed. We want to be able to continue after some
# errors.
for q in create_queries:
if re.search('create temporary table', q, flags=re.IGNORECASE):
print(f"Temporary tables are not allowed in performance tests: '{q}'",
file = sys.stderr)
sys.exit(1)
2020-09-23 08:21:55 +00:00
def do_create(connection, index, queries):
for q in queries:
connection.execute(q)
print(f'create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}')
2020-09-23 08:21:55 +00:00
threads = [Thread(target = do_create, args = (connection, index, create_queries))
for index, connection in enumerate(all_connections)]
for t in threads:
t.start()
for t in threads:
t.join()
2020-09-24 11:46:03 +00:00
reportStageEnd('create')
# By default, test all queries.
2020-09-24 13:24:44 +00:00
queries_to_run = range(0, len(test_queries))
if args.max_queries:
# If specified, test a limited number of queries chosen at random.
queries_to_run = random.sample(range(0, len(test_queries)), min(len(test_queries), args.max_queries))
if args.queries_to_run:
2020-09-28 12:09:08 +00:00
# Run the specified queries.
2020-09-24 13:24:44 +00:00
queries_to_run = args.queries_to_run
2020-06-23 12:09:54 +00:00
# Run test queries.
2020-09-25 09:52:09 +00:00
profile_total_seconds = 0
for query_index in queries_to_run:
q = test_queries[query_index]
query_prefix = f'{test_name}.query{query_index}'
2020-04-29 09:28:12 +00:00
# We have some crazy long queries (about 100kB), so trim them to a sane
# length. This means we can't use query text as an identifier and have to
# use the test name + the test-wide query index.
2020-04-29 09:28:12 +00:00
query_display_name = q
if len(query_display_name) > 1000:
2020-05-21 18:06:08 +00:00
query_display_name = f'{query_display_name[:1000]}...({query_index})'
2020-04-29 09:28:12 +00:00
print(f'display-name\t{query_index}\t{tsv_escape(query_display_name)}')
2020-04-29 09:28:12 +00:00
2020-02-11 20:00:53 +00:00
# Prewarm: run once on both servers. Helps to bring the data into memory,
2020-02-11 15:01:16 +00:00
# precompile the queries, etc.
# A query might not run on the old server if it uses a function added in the
# new one. We want to run them on the new server only, so that the PR author
# can ensure that the test works properly. Remember the errors we had on
# each server.
2020-09-02 03:29:36 +00:00
query_error_on_connection = [None] * len(all_connections);
for conn_index, c in enumerate(all_connections):
try:
prewarm_id = f'{query_prefix}.prewarm0'
# Will also detect too long queries during warmup stage
res = c.execute(q, query_id = prewarm_id, settings = {'max_execution_time': 10})
print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}')
except KeyboardInterrupt:
raise
except:
# FIXME the driver reconnects on error and we lose settings, so this
# might lead to further errors or unexpected behavior.
query_error_on_connection[conn_index] = traceback.format_exc();
continue
2020-08-12 05:39:29 +00:00
# Report all errors that ocurred during prewarm and decide what to do next.
# If prewarm fails for the query on all servers -- skip the query and
# continue testing the next query.
# If prewarm fails on one of the servers, run the query on the rest of them.
2020-08-12 05:39:29 +00:00
no_errors = []
for i, e in enumerate(query_error_on_connection):
if e:
print(e, file = sys.stderr)
else:
no_errors.append(i)
if len(no_errors) == 0:
continue
2020-09-02 03:29:36 +00:00
elif len(no_errors) < len(all_connections):
2020-08-12 05:39:29 +00:00
print(f'partial\t{query_index}\t{no_errors}')
2020-09-02 03:29:36 +00:00
this_query_connections = [all_connections[index] for index in no_errors]
2020-02-11 15:01:16 +00:00
# Now, perform measured runs.
# Track the time spent by the client to process this query, so that we can
# notice the queries that take long to process on the client side, e.g. by
# sending excessive data.
2020-01-16 19:39:07 +00:00
start_seconds = time.perf_counter()
server_seconds = 0
profile_seconds = 0
2020-09-02 03:29:36 +00:00
run = 0
# Arrays of run times for each connection.
2020-09-19 18:21:29 +00:00
all_server_times = []
for conn_index, c in enumerate(this_query_connections):
all_server_times.append([])
2020-09-02 03:29:36 +00:00
while True:
run_id = f'{query_prefix}.run{run}'
2020-08-12 05:39:29 +00:00
2020-09-02 03:29:36 +00:00
for conn_index, c in enumerate(this_query_connections):
2020-08-12 05:39:29 +00:00
try:
res = c.execute(q, query_id = run_id)
except Exception as e:
# Add query id to the exception to make debugging easier.
e.args = (run_id, *e.args)
e.message = run_id + ': ' + e.message
raise
elapsed = c.last_query.elapsed
all_server_times[conn_index].append(elapsed)
2020-01-16 19:39:07 +00:00
server_seconds += elapsed
print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}')
if elapsed > 10:
# Stop processing pathologically slow queries, to avoid timing out
# the entire test task. This shouldn't really happen, so we don't
# need much handling for this case and can just exit.
2020-09-21 19:08:41 +00:00
print(f'The query no. {query_index} is taking too long to run ({elapsed} s)', file=sys.stderr)
exit(2)
2020-09-02 03:29:36 +00:00
# Be careful with the counter, after this line it's the next iteration
# already.
run += 1
# Try to run any query for at least the specified number of times,
# before considering other stop conditions.
2020-09-18 08:27:27 +00:00
if run < args.runs:
continue
2020-09-02 03:29:36 +00:00
# For very short queries we have a special mode where we run them for at
# least some time. The recommended lower bound of run time for "normal"
# queries is about 0.1 s, and we run them about 10 times, giving the
# time per query per server of about one second. Use this value as a
# reference for "short" queries.
if is_short[query_index]:
2020-09-03 01:42:25 +00:00
if server_seconds >= 2 * len(this_query_connections):
2020-09-02 03:29:36 +00:00
break
# Also limit the number of runs, so that we don't go crazy processing
# the results -- 'eqmed.sql' is really suboptimal.
2020-09-16 11:39:44 +00:00
if run >= 500:
2020-09-02 03:29:36 +00:00
break
else:
if run >= args.runs:
break
2020-01-16 19:39:07 +00:00
client_seconds = time.perf_counter() - start_seconds
print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}')
# Run additional profiling queries to collect profile data, but only if test times appeared to be different.
# We have to do it after normal runs because otherwise it will affect test statistics too much
2020-09-25 09:52:09 +00:00
if len(all_server_times) != 2:
continue
2020-09-28 18:43:47 +00:00
if len(all_server_times[0]) < 3:
# Don't fail if for some reason there are not enough measurements.
continue
2020-09-25 09:52:09 +00:00
pvalue = stats.ttest_ind(all_server_times[0], all_server_times[1], equal_var = False).pvalue
2020-09-29 19:06:45 +00:00
median = [statistics.median(t) for t in all_server_times]
2020-09-30 11:55:24 +00:00
# Keep this consistent with the value used in report. Should eventually move
# to (median[1] - median[0]) / min(median), which is compatible with "times"
# difference we use in report (max(median) / min(median)).
relative_diff = (median[1] - median[0]) / median[0]
print(f'diff\t{query_index}\t{median[0]}\t{median[1]}\t{relative_diff}\t{pvalue}')
2020-09-29 19:06:45 +00:00
if abs(relative_diff) < ignored_relative_change or pvalue > 0.05:
2020-09-25 09:52:09 +00:00
continue
# Perform profile runs for fixed amount of time. Don't limit the number
# of runs, because we also have short queries.
profile_start_seconds = time.perf_counter()
run = 0
while time.perf_counter() - profile_start_seconds < args.profile_seconds:
run_id = f'{query_prefix}.profile{run}'
for conn_index, c in enumerate(this_query_connections):
try:
res = c.execute(q, query_id = run_id, settings = {'query_profiler_real_time_period_ns': 10000000})
print(f'profile\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}')
except Exception as e:
# Add query id to the exception to make debugging easier.
e.args = (run_id, *e.args)
e.message = run_id + ': ' + e.message
raise
run += 1
profile_total_seconds += time.perf_counter() - profile_start_seconds
2020-09-25 09:52:09 +00:00
print(f'profile-total\t{profile_total_seconds}')
2020-09-24 11:46:03 +00:00
reportStageEnd('run')
# Run drop queries
2020-03-03 13:38:45 +00:00
drop_queries = substitute_parameters(drop_query_templates)
2020-09-02 03:29:36 +00:00
for conn_index, c in enumerate(all_connections):
for q in drop_queries:
c.execute(q)
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
2020-09-24 11:46:03 +00:00
reportStageEnd('drop-2')