debug perf test run time

This commit is contained in:
Alexander Kuzmenkov 2020-09-24 14:46:03 +03:00
parent 7a2044f694
commit dee34a834f
2 changed files with 79 additions and 7 deletions

View File

@ -658,13 +658,15 @@ create view test_runs as
group by test
;
create table test_times_report engine File(TSV, 'report/test-times.tsv') as
select wall_clock_time_per_test.test, real,
toDecimal64(total_client_time, 3),
create view test_times_view as
select
wall_clock_time_per_test.test test,
real,
total_client_time,
queries,
toDecimal64(query_max, 3),
toDecimal64(real / queries, 3) avg_real_per_query,
toDecimal64(query_min, 3),
query_max,
real / queries avg_real_per_query,
query_min,
runs
from test_time
-- wall clock times are also measured for skipped tests, so don't
@ -673,7 +675,43 @@ create table test_times_report engine File(TSV, 'report/test-times.tsv') as
on wall_clock_time_per_test.test = test_time.test
full join test_runs
on test_runs.test = test_time.test
order by avg_real_per_query desc;
;
-- WITH TOTALS doesn't work with INSERT SELECT, so we have to jump through these
-- hoops: https://github.com/ClickHouse/ClickHouse/issues/15227
create view test_times_view_total as
select
'Total' test,
sum(real),
sum(total_client_time),
sum(queries),
max(query_max),
sum(real) / sum(queries) avg_real_per_query,
min(query_min),
-- Totaling the number of runs doesn't make sense, but use the max so
-- that the reporting script doesn't complain about queries being too
-- long.
max(runs)
from test_times_view
;
create table test_times_report engine File(TSV, 'report/test-times.tsv') as
select
test,
toDecimal64(real, 3),
toDecimal64(total_client_time, 3),
queries,
toDecimal64(query_max, 3),
toDecimal64(avg_real_per_query, 3),
toDecimal64(query_min, 3),
runs
from (
select * from test_times_view
union all
select * from test_times_view_total
)
order by test = 'Total' desc, avg_real_per_query desc
;
-- report for all queries page, only main metric
create table all_tests_report engine File(TSV, 'report/all-queries.tsv') as

View File

@ -18,9 +18,22 @@ import xml.etree.ElementTree as et
from threading import Thread
from scipy import stats
total_start_seconds = time.perf_counter()
stage_start_seconds = total_start_seconds
def reportStageEnd(stage):
global stage_start_seconds, total_start_seconds
current = time.perf_counter()
print(f'stage\t{stage}\t{current - stage_start_seconds:.3f}\t{current - total_start_seconds:.3f}')
stage_start_seconds = current
def tsv_escape(s):
return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','')
parser = argparse.ArgumentParser(description='Run performance test.')
# Explicitly decode files as UTF-8 because sometimes we have Russian characters in queries, and LANG=C is set.
parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, help='test description file')
@ -33,11 +46,15 @@ parser.add_argument('--print-queries', action='store_true', help='Print test que
parser.add_argument('--print-settings', action='store_true', help='Print test settings and exit.')
args = parser.parse_args()
reportStageEnd('start')
test_name = os.path.splitext(os.path.basename(args.file[0].name))[0]
tree = et.parse(args.file[0])
root = tree.getroot()
reportStageEnd('parse')
# Process query parameters
subst_elems = root.findall('substitutions/substitution')
available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... }
@ -109,6 +126,8 @@ if not args.long:
if 'max_ignored_relative_change' in root.attrib:
print(f'report-threshold\t{root.attrib["max_ignored_relative_change"]}')
reportStageEnd('before-connect')
# Open connections
servers = [{'host': host, 'port': port} for (host, port) in zip(args.host, args.port)]
all_connections = [clickhouse_driver.Client(**server) for server in servers]
@ -116,6 +135,8 @@ all_connections = [clickhouse_driver.Client(**server) for server in servers]
for s in servers:
print('server\t{}\t{}'.format(s['host'], s['port']))
reportStageEnd('connect')
# Run drop queries, ignoring errors. Do this before all other activity, because
# clickhouse_driver disconnects on error (this is not configurable), and the new
# connection loses the changes in settings.
@ -129,6 +150,8 @@ for conn_index, c in enumerate(all_connections):
except:
pass
reportStageEnd('drop-1')
# Apply settings.
# If there are errors, report them and continue -- maybe a new test uses a setting
# that is not in master, but the queries can still run. If we have multiple
@ -146,6 +169,8 @@ for conn_index, c in enumerate(all_connections):
except:
print(traceback.format_exc(), file=sys.stderr)
reportStageEnd('settings')
# Check tables that should exist. If they don't exist, just skip this test.
tables = [e.text for e in root.findall('preconditions/table_exists')]
for t in tables:
@ -158,6 +183,8 @@ for t in tables:
print(f'skipped\t{tsv_escape(skipped_message)}')
sys.exit(0)
reportStageEnd('preconditions')
# Run create and fill queries. We will run them simultaneously for both servers,
# to save time.
# The weird search is to keep the relative order of elements, which matters, and
@ -188,6 +215,8 @@ for t in threads:
for t in threads:
t.join()
reportStageEnd('create')
# Run the queries in randomized order, but preserve their indexes as specified
# in the test XML. To avoid using too much time, limit the number of queries
# we run per test.
@ -342,9 +371,14 @@ for query_index in queries_to_run:
break
# And don't bother with short queries
reportStageEnd('run')
# Run drop queries
drop_queries = substitute_parameters(drop_query_templates)
for conn_index, c in enumerate(all_connections):
for q in drop_queries:
c.execute(q)
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
reportStageEnd('drop-2')