Merge pull request #15027 from ClickHouse/perf-test-separate-profiler

Separate profile runs in performance test
This commit is contained in:
alexey-milovidov 2020-09-21 21:07:14 +03:00 committed by GitHub
commit 5e56f14d5f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 47 additions and 9 deletions

View File

@ -29,7 +29,7 @@ RUN apt-get update \
tzdata \
vim \
wget \
&& pip3 --no-cache-dir install clickhouse_driver \
&& pip3 --no-cache-dir install clickhouse_driver scipy \
&& apt-get purge --yes python3-dev g++ \
&& apt-get autoremove --yes \
&& apt-get clean \

View File

@ -1,8 +1,6 @@
<yandex>
<profiles>
<default>
<query_profiler_real_time_period_ns>10000000</query_profiler_real_time_period_ns>
<query_profiler_cpu_time_period_ns>0</query_profiler_cpu_time_period_ns>
<allow_introspection_functions>1</allow_introspection_functions>
<log_queries>1</log_queries>
<metrics_perf_events_enabled>1</metrics_perf_events_enabled>

View File

@ -15,6 +15,7 @@ import sys
import time
import traceback
import xml.etree.ElementTree as et
from scipy import stats
def tsv_escape(s):
return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','')
@ -211,7 +212,8 @@ for query_index in queries_to_run:
for conn_index, c in enumerate(all_connections):
try:
prewarm_id = f'{query_prefix}.prewarm0'
res = c.execute(q, query_id = prewarm_id)
# Will also detect too long queries during warmup stage
res = c.execute(q, query_id = prewarm_id, settings = {'max_execution_time': 10})
print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}')
except KeyboardInterrupt:
raise
@ -221,7 +223,6 @@ for query_index in queries_to_run:
query_error_on_connection[conn_index] = traceback.format_exc();
continue
# Report all errors that ocurred during prewarm and decide what to do next.
# If prewarm fails for the query on all servers -- skip the query and
# continue testing the next query.
@ -246,7 +247,14 @@ for query_index in queries_to_run:
# sending excessive data.
start_seconds = time.perf_counter()
server_seconds = 0
profile_seconds = 0
run = 0
# Arrays of run times for each connection.
all_server_times = []
for conn_index, c in enumerate(this_query_connections):
all_server_times.append([])
while True:
run_id = f'{query_prefix}.run{run}'
@ -259,14 +267,17 @@ for query_index in queries_to_run:
e.message = run_id + ': ' + e.message
raise
server_seconds += c.last_query.elapsed
print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}')
elapsed = c.last_query.elapsed
all_server_times[conn_index].append(elapsed)
if c.last_query.elapsed > 10:
server_seconds += elapsed
print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}')
if elapsed > 10:
# Stop processing pathologically slow queries, to avoid timing out
# the entire test task. This shouldn't really happen, so we don't
# need much handling for this case and can just exit.
print(f'The query no. {query_index} is taking too long to run ({c.last_query.elapsed} s)', file=sys.stderr)
print(f'The query no. {query_index} is taking too long to run ({c.elapsed} s)', file=sys.stderr)
exit(2)
# Be careful with the counter, after this line it's the next iteration
@ -297,6 +308,35 @@ for query_index in queries_to_run:
client_seconds = time.perf_counter() - start_seconds
print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}')
#print(all_server_times)
#print(stats.ttest_ind(all_server_times[0], all_server_times[1], equal_var = False).pvalue)
# Run additional profiling queries to collect profile data, but only if test times appeared to be different.
# We have to do it after normal runs because otherwise it will affect test statistics too much
if len(all_server_times) == 2 and stats.ttest_ind(all_server_times[0], all_server_times[1], equal_var = False).pvalue < 0.1:
run = 0
while True:
run_id = f'{query_prefix}.profile{run}'
for conn_index, c in enumerate(this_query_connections):
try:
res = c.execute(q, query_id = run_id, settings = {'query_profiler_real_time_period_ns': 10000000})
print(f'profile\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}')
except Exception as e:
# Add query id to the exception to make debugging easier.
e.args = (run_id, *e.args)
e.message = run_id + ': ' + e.message
raise
elapsed = c.last_query.elapsed
profile_seconds += elapsed
run += 1
# Don't spend too much time for profile runs
if run > args.runs or profile_seconds > 10:
break
# And don't bother with short queries
# Run drop queries
drop_queries = substitute_parameters(drop_query_templates)
for conn_index, c in enumerate(all_connections):