mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-26 01:22:04 +00:00
Merge pull request #15027 from ClickHouse/perf-test-separate-profiler
Separate profile runs in performance test
This commit is contained in:
commit
5e56f14d5f
@ -29,7 +29,7 @@ RUN apt-get update \
|
|||||||
tzdata \
|
tzdata \
|
||||||
vim \
|
vim \
|
||||||
wget \
|
wget \
|
||||||
&& pip3 --no-cache-dir install clickhouse_driver \
|
&& pip3 --no-cache-dir install clickhouse_driver scipy \
|
||||||
&& apt-get purge --yes python3-dev g++ \
|
&& apt-get purge --yes python3-dev g++ \
|
||||||
&& apt-get autoremove --yes \
|
&& apt-get autoremove --yes \
|
||||||
&& apt-get clean \
|
&& apt-get clean \
|
||||||
|
@ -1,8 +1,6 @@
|
|||||||
<yandex>
|
<yandex>
|
||||||
<profiles>
|
<profiles>
|
||||||
<default>
|
<default>
|
||||||
<query_profiler_real_time_period_ns>10000000</query_profiler_real_time_period_ns>
|
|
||||||
<query_profiler_cpu_time_period_ns>0</query_profiler_cpu_time_period_ns>
|
|
||||||
<allow_introspection_functions>1</allow_introspection_functions>
|
<allow_introspection_functions>1</allow_introspection_functions>
|
||||||
<log_queries>1</log_queries>
|
<log_queries>1</log_queries>
|
||||||
<metrics_perf_events_enabled>1</metrics_perf_events_enabled>
|
<metrics_perf_events_enabled>1</metrics_perf_events_enabled>
|
||||||
|
@ -15,6 +15,7 @@ import sys
|
|||||||
import time
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
import xml.etree.ElementTree as et
|
import xml.etree.ElementTree as et
|
||||||
|
from scipy import stats
|
||||||
|
|
||||||
def tsv_escape(s):
|
def tsv_escape(s):
|
||||||
return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','')
|
return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','')
|
||||||
@ -211,7 +212,8 @@ for query_index in queries_to_run:
|
|||||||
for conn_index, c in enumerate(all_connections):
|
for conn_index, c in enumerate(all_connections):
|
||||||
try:
|
try:
|
||||||
prewarm_id = f'{query_prefix}.prewarm0'
|
prewarm_id = f'{query_prefix}.prewarm0'
|
||||||
res = c.execute(q, query_id = prewarm_id)
|
# Will also detect too long queries during warmup stage
|
||||||
|
res = c.execute(q, query_id = prewarm_id, settings = {'max_execution_time': 10})
|
||||||
print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}')
|
print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}')
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
raise
|
raise
|
||||||
@ -221,7 +223,6 @@ for query_index in queries_to_run:
|
|||||||
query_error_on_connection[conn_index] = traceback.format_exc();
|
query_error_on_connection[conn_index] = traceback.format_exc();
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
||||||
# Report all errors that ocurred during prewarm and decide what to do next.
|
# Report all errors that ocurred during prewarm and decide what to do next.
|
||||||
# If prewarm fails for the query on all servers -- skip the query and
|
# If prewarm fails for the query on all servers -- skip the query and
|
||||||
# continue testing the next query.
|
# continue testing the next query.
|
||||||
@ -246,7 +247,14 @@ for query_index in queries_to_run:
|
|||||||
# sending excessive data.
|
# sending excessive data.
|
||||||
start_seconds = time.perf_counter()
|
start_seconds = time.perf_counter()
|
||||||
server_seconds = 0
|
server_seconds = 0
|
||||||
|
profile_seconds = 0
|
||||||
run = 0
|
run = 0
|
||||||
|
|
||||||
|
# Arrays of run times for each connection.
|
||||||
|
all_server_times = []
|
||||||
|
for conn_index, c in enumerate(this_query_connections):
|
||||||
|
all_server_times.append([])
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
run_id = f'{query_prefix}.run{run}'
|
run_id = f'{query_prefix}.run{run}'
|
||||||
|
|
||||||
@ -259,14 +267,17 @@ for query_index in queries_to_run:
|
|||||||
e.message = run_id + ': ' + e.message
|
e.message = run_id + ': ' + e.message
|
||||||
raise
|
raise
|
||||||
|
|
||||||
server_seconds += c.last_query.elapsed
|
elapsed = c.last_query.elapsed
|
||||||
print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}')
|
all_server_times[conn_index].append(elapsed)
|
||||||
|
|
||||||
if c.last_query.elapsed > 10:
|
server_seconds += elapsed
|
||||||
|
print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}')
|
||||||
|
|
||||||
|
if elapsed > 10:
|
||||||
# Stop processing pathologically slow queries, to avoid timing out
|
# Stop processing pathologically slow queries, to avoid timing out
|
||||||
# the entire test task. This shouldn't really happen, so we don't
|
# the entire test task. This shouldn't really happen, so we don't
|
||||||
# need much handling for this case and can just exit.
|
# need much handling for this case and can just exit.
|
||||||
print(f'The query no. {query_index} is taking too long to run ({c.last_query.elapsed} s)', file=sys.stderr)
|
print(f'The query no. {query_index} is taking too long to run ({c.elapsed} s)', file=sys.stderr)
|
||||||
exit(2)
|
exit(2)
|
||||||
|
|
||||||
# Be careful with the counter, after this line it's the next iteration
|
# Be careful with the counter, after this line it's the next iteration
|
||||||
@ -297,6 +308,35 @@ for query_index in queries_to_run:
|
|||||||
client_seconds = time.perf_counter() - start_seconds
|
client_seconds = time.perf_counter() - start_seconds
|
||||||
print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}')
|
print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}')
|
||||||
|
|
||||||
|
#print(all_server_times)
|
||||||
|
#print(stats.ttest_ind(all_server_times[0], all_server_times[1], equal_var = False).pvalue)
|
||||||
|
|
||||||
|
# Run additional profiling queries to collect profile data, but only if test times appeared to be different.
|
||||||
|
# We have to do it after normal runs because otherwise it will affect test statistics too much
|
||||||
|
if len(all_server_times) == 2 and stats.ttest_ind(all_server_times[0], all_server_times[1], equal_var = False).pvalue < 0.1:
|
||||||
|
run = 0
|
||||||
|
while True:
|
||||||
|
run_id = f'{query_prefix}.profile{run}'
|
||||||
|
|
||||||
|
for conn_index, c in enumerate(this_query_connections):
|
||||||
|
try:
|
||||||
|
res = c.execute(q, query_id = run_id, settings = {'query_profiler_real_time_period_ns': 10000000})
|
||||||
|
print(f'profile\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}')
|
||||||
|
except Exception as e:
|
||||||
|
# Add query id to the exception to make debugging easier.
|
||||||
|
e.args = (run_id, *e.args)
|
||||||
|
e.message = run_id + ': ' + e.message
|
||||||
|
raise
|
||||||
|
|
||||||
|
elapsed = c.last_query.elapsed
|
||||||
|
profile_seconds += elapsed
|
||||||
|
|
||||||
|
run += 1
|
||||||
|
# Don't spend too much time for profile runs
|
||||||
|
if run > args.runs or profile_seconds > 10:
|
||||||
|
break
|
||||||
|
# And don't bother with short queries
|
||||||
|
|
||||||
# Run drop queries
|
# Run drop queries
|
||||||
drop_queries = substitute_parameters(drop_query_templates)
|
drop_queries = substitute_parameters(drop_query_templates)
|
||||||
for conn_index, c in enumerate(all_connections):
|
for conn_index, c in enumerate(all_connections):
|
||||||
|
Loading…
Reference in New Issue
Block a user