ClickHouse/docker/test/performance-comparison/perf.py

#!/usr/bin/python3

import os
import sys
import itertools
import clickhouse_driver
import xml.etree.ElementTree as et
import argparse
import pprint
import string
import time
import traceback

def tsv_escape(s):
    return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','')

stage_start_seconds = time.perf_counter()

def report_stage_end(stage_name):
    global stage_start_seconds
    print('{}\t{}'.format(stage_name, time.perf_counter() - stage_start_seconds))
    stage_start_seconds = time.perf_counter()

report_stage_end('start')

parser = argparse.ArgumentParser(description='Run performance test.')
# Explicitly decode files as UTF-8 because sometimes we have Russian characters in queries, and LANG=C is set.
parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, help='test description file')
parser.add_argument('--host', nargs='*', default=['localhost'], help="Server hostname(s). Corresponds to '--port' options.")
parser.add_argument('--port', nargs='*', default=[9000], help="Server port(s). Corresponds to '--host' options.")
parser.add_argument('--runs', type=int, default=int(os.environ.get('CHPC_RUNS', 13)), help='Number of query runs per server. Defaults to CHPC_RUNS environment variable.')
parser.add_argument('--no-long', type=bool, default=True, help='Skip the tests tagged as long.')
args = parser.parse_args()

test_name = os.path.splitext(os.path.basename(args.file[0].name))[0]

tree = et.parse(args.file[0])
root = tree.getroot()

# Skip long tests
for tag in root.findall('.//tag'):
    if tag.text == 'long':
        print('skipped\tTest is tagged as long.')
        sys.exit(0)

# Check main metric
main_metric_element = root.find('main_metric/*')
if main_metric_element is not None and main_metric_element.tag != 'min_time':
    raise Exception('Only the min_time main metric is supported. This test uses \'{}\''.format(main_metric_element.tag))

# FIXME another way to detect infinite tests. They should have an appropriate main_metric but sometimes they don't.
infinite_sign = root.find('.//average_speed_not_changing_for_ms')
if infinite_sign is not None:
    raise Exception('Looks like the test is infinite (sign 1)')

# Print report threshold for the test if it is set.
if 'max_ignored_relative_change' in root.attrib:
    print(f'report-threshold\t{root.attrib["max_ignored_relative_change"]}')

# Open connections
servers = [{'host': host, 'port': port} for (host, port) in zip(args.host, args.port)]
connections = [clickhouse_driver.Client(**server) for server in servers]

for s in servers:
    print('server\t{}\t{}'.format(s['host'], s['port']))

report_stage_end('connect')

# Process query parameters
subst_elems = root.findall('substitutions/substitution')
available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... }
for e in subst_elems:
    available_parameters[e.find('name').text] = [v.text for v in e.findall('values/value')]

# Takes parallel lists of templates, substitutes them with all combos of
# parameters. The set of parameters is determined based on the first list.
# Note: keep the order of queries -- sometimes we have DROP IF EXISTS
# followed by CREATE in create queries section, so the order matters.
def substitute_parameters(query_templates, *other_templates):
    query_results = []
    other_results = [[]] * (len(other_templates))
    for i, q in enumerate(query_templates):
        keys = set(n for _, n, _, _ in string.Formatter().parse(q) if n)
        values = [available_parameters[k] for k in keys]
        combos = itertools.product(*values)
        for c in combos:
            with_keys = dict(zip(keys, c))
            query_results.append(q.format(**with_keys))
            for j, t in enumerate(other_templates):
                other_results[j].append(t[i].format(**with_keys))
    if len(other_templates):
        return query_results, *other_results
    else:
        return query_results

report_stage_end('substitute')

# Run drop queries, ignoring errors. Do this before all other activity, because
# clickhouse_driver disconnects on error (this is not configurable), and the new
# connection loses the changes in settings.
drop_query_templates = [q.text for q in root.findall('drop_query')]
drop_queries = substitute_parameters(drop_query_templates)
for c in connections:
    for q in drop_queries:
        try:
            c.execute(q)
        except:
            pass

report_stage_end('drop1')

# Apply settings.
# If there are errors, report them and continue -- maybe a new test uses a setting
# that is not in master, but the queries can still run. If we have multiple
# settings and one of them throws an exception, all previous settings for this
# connection will be reset, because the driver reconnects on error (not
# configurable). So the end result is uncertain, but hopefully we'll be able to
# run at least some queries.
settings = root.findall('settings/*')
for c in connections:
    for s in settings:
        try:
            c.execute("set {} = '{}'".format(s.tag, s.text))
        except:
            print(traceback.format_exc(), file=sys.stderr)

report_stage_end('settings')

# Check tables that should exist. If they don't exist, just skip this test.
tables = [e.text for e in root.findall('preconditions/table_exists')]
for t in tables:
    for c in connections:
        try:
            res = c.execute("select 1 from {} limit 1".format(t))
        except:
            exception_message = traceback.format_exception_only(*sys.exc_info()[:2])[-1]
            skipped_message = ' '.join(exception_message.split('\n')[:2])
            print(f'skipped\t{tsv_escape(skipped_message)}')
            sys.exit(0)

report_stage_end('preconditions')

# Run create queries
create_query_templates = [q.text for q in root.findall('create_query')]
create_queries = substitute_parameters(create_query_templates)
for c in connections:
    for q in create_queries:
        c.execute(q)

# Run fill queries
fill_query_templates = [q.text for q in root.findall('fill_query')]
fill_queries = substitute_parameters(fill_query_templates)
for c in connections:
    for q in fill_queries:
        c.execute(q)

report_stage_end('fill')

# Build a list of test queries, substituting parameters to query templates,
# and reporting the queries marked as short.
test_queries = []
for e in root.findall('query'):
    new_queries = []
    if 'short' in e.attrib:
        new_queries, short = substitute_parameters([e.text], [e.attrib['short']])
        print(new_queries)
        print(short)
        for i, s in enumerate(short):
            if eval(s):
                print(f'short\t{i + len(test_queries)}')
    else:
        new_queries = substitute_parameters([e.text])

    test_queries += new_queries

report_stage_end('substitute2')

# Run test queries.
for query_index, q in enumerate(test_queries):
    query_prefix = f'{test_name}.query{query_index}'

    # We have some crazy long queries (about 100kB), so trim them to a sane
    # length. This means we can't use query text as an identifier and have to
    # use the test name + the test-wide query index.
    query_display_name = q
    if len(query_display_name) > 1000:
        query_display_name = f'{query_display_name[:1000]}...({query_index})'

    print(f'display-name\t{query_index}\t{tsv_escape(query_display_name)}')

    # Prewarm: run once on both servers. Helps to bring the data into memory,
    # precompile the queries, etc.
    try:
        for conn_index, c in enumerate(connections):
            prewarm_id = f'{query_prefix}.prewarm0'
            res = c.execute(q, query_id = prewarm_id)
            print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}')
    except KeyboardInterrupt:
        raise
    except:
        # If prewarm fails for some query -- skip it, and try to test the others.
        # This might happen if the new test introduces some function that the
        # old server doesn't support. Still, report it as an error.
        # FIXME the driver reconnects on error and we lose settings, so this might
        # lead to further errors or unexpected behavior.
        print(traceback.format_exc(), file=sys.stderr)
        continue

    # Now, perform measured runs.
    # Track the time spent by the client to process this query, so that we can notice
    # out the queries that take long to process on the client side, e.g. by sending
    # excessive data.
    start_seconds = time.perf_counter()
    server_seconds = 0
    for run in range(0, args.runs):
        run_id = f'{query_prefix}.run{run}'
        for conn_index, c in enumerate(connections):
            res = c.execute(q, query_id = run_id)
            print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}')
            server_seconds += c.last_query.elapsed

    client_seconds = time.perf_counter() - start_seconds
    print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}')

report_stage_end('benchmark')

# Run drop queries
drop_query_templates = [q.text for q in root.findall('drop_query')]
drop_queries = substitute_parameters(drop_query_templates)
for c in connections:
    for q in drop_queries:
        c.execute(q)

report_stage_end('drop2')
[wip] some experimental scripts for peformance comparison 2019-12-26 17:35:41 +00:00			`#!/usr/bin/python3`

[wip] performance comparison 2020-01-21 18:15:25 +00:00			`import os`
[wip] performance comparison test 2020-01-16 14:29:30 +00:00			`import sys`
[wip] some experimental scripts for peformance comparison 2019-12-26 17:35:41 +00:00			`import itertools`
			`import clickhouse_driver`
			`import xml.etree.ElementTree as et`
			`import argparse`
			`import pprint`
performance comparison 2020-03-03 13:38:45 +00:00			`import string`
[wip] performance comparison test 2020-01-16 19:39:07 +00:00			`import time`
			`import traceback`
[wip] some experimental scripts for peformance comparison 2019-12-26 17:35:41 +00:00
performance comparison 2020-05-25 01:03:21 +00:00			`def tsv_escape(s):`
			`return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','')`

performance comparison 2020-02-14 12:55:47 +00:00			`stage_start_seconds = time.perf_counter()`

			`def report_stage_end(stage_name):`
			`global stage_start_seconds`
			`print('{}\t{}'.format(stage_name, time.perf_counter() - stage_start_seconds))`
			`stage_start_seconds = time.perf_counter()`

			`report_stage_end('start')`

[wip] some experimental scripts for peformance comparison 2019-12-26 17:35:41 +00:00			`parser = argparse.ArgumentParser(description='Run performance test.')`
Performance comparison improvements. 2020-01-10 14:06:07 +00:00			`# Explicitly decode files as UTF-8 because sometimes we have Russian characters in queries, and LANG=C is set.`
			`parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, help='test description file')`
Convenient defaults for perf test runner. 2020-04-06 20:47:55 +00:00			`parser.add_argument('--host', nargs='*', default=['localhost'], help="Server hostname(s). Corresponds to '--port' options.")`
			`parser.add_argument('--port', nargs='*', default=[9000], help="Server port(s). Corresponds to '--host' options.")`
performance comparison 2020-04-21 18:46:45 +00:00			`parser.add_argument('--runs', type=int, default=int(os.environ.get('CHPC_RUNS', 13)), help='Number of query runs per server. Defaults to CHPC_RUNS environment variable.')`
performance comparison 2020-03-03 10:47:32 +00:00			`parser.add_argument('--no-long', type=bool, default=True, help='Skip the tests tagged as long.')`
[wip] some experimental scripts for peformance comparison 2019-12-26 17:35:41 +00:00			`args = parser.parse_args()`

[wip] Add memory stats to performance test 2020-05-20 02:19:19 +00:00			`test_name = os.path.splitext(os.path.basename(args.file[0].name))[0]`

[wip] some experimental scripts for peformance comparison 2019-12-26 17:35:41 +00:00			`tree = et.parse(args.file[0])`
			`root = tree.getroot()`

performance comparison 2020-03-03 10:47:32 +00:00			`# Skip long tests`
			`for tag in root.findall('.//tag'):`
			`if tag.text == 'long':`
			`print('skipped\tTest is tagged as long.')`
			`sys.exit(0)`

[wip] some experimental scripts for peformance comparison 2019-12-26 17:35:41 +00:00			`# Check main metric`
[wip] perf comparison: docker + some tweaks 2019-12-26 19:16:36 +00:00			`main_metric_element = root.find('main_metric/*')`
[wip] performance comparison fixes 2019-12-26 21:33:10 +00:00			`if main_metric_element is not None and main_metric_element.tag != 'min_time':`
			`raise Exception('Only the min_time main metric is supported. This test uses \'{}\''.format(main_metric_element.tag))`

			`# FIXME another way to detect infinite tests. They should have an appropriate main_metric but sometimes they don't.`
			`infinite_sign = root.find('.//average_speed_not_changing_for_ms')`
			`if infinite_sign is not None:`
			`raise Exception('Looks like the test is infinite (sign 1)')`
[wip] some experimental scripts for peformance comparison 2019-12-26 17:35:41 +00:00
performance comparison 2020-04-28 07:45:35 +00:00			`# Print report threshold for the test if it is set.`
			`if 'max_ignored_relative_change' in root.attrib:`
			`print(f'report-threshold\t{root.attrib["max_ignored_relative_change"]}')`

[wip] some experimental scripts for peformance comparison 2019-12-26 17:35:41 +00:00			`# Open connections`
[wip] performance comparison 2020-01-21 18:15:25 +00:00			`servers = [{'host': host, 'port': port} for (host, port) in zip(args.host, args.port)]`
[wip] some experimental scripts for peformance comparison 2019-12-26 17:35:41 +00:00			`connections = [clickhouse_driver.Client(**server) for server in servers]`

performance comparison 2020-03-02 15:05:58 +00:00			`for s in servers:`
			`print('server\t{}\t{}'.format(s['host'], s['port']))`
performance comparison 2020-02-14 12:55:47 +00:00
performance comparison 2020-03-02 15:05:58 +00:00			`report_stage_end('connect')`
performance comparison 2020-02-14 12:55:47 +00:00
performance comparison 2020-03-03 13:38:45 +00:00			`# Process query parameters`
[wip] some experimental scripts for peformance comparison 2019-12-26 17:35:41 +00:00			`subst_elems = root.findall('substitutions/substitution')`
performance comparison 2020-03-03 13:38:45 +00:00			`available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... }`
			`for e in subst_elems:`
			`available_parameters[e.find('name').text] = [v.text for v in e.findall('values/value')]`
[wip] some experimental scripts for peformance comparison 2019-12-26 17:35:41 +00:00
Explicitly mark short perftest queries 2020-06-23 12:09:54 +00:00			`# Takes parallel lists of templates, substitutes them with all combos of`
			`# parameters. The set of parameters is determined based on the first list.`
			`# Note: keep the order of queries -- sometimes we have DROP IF EXISTS`
performance comparison 2020-02-21 19:58:52 +00:00			`# followed by CREATE in create queries section, so the order matters.`
Explicitly mark short perftest queries 2020-06-23 12:09:54 +00:00			`def substitute_parameters(query_templates, *other_templates):`
			`query_results = []`
			`other_results = [[]] * (len(other_templates))`
			`for i, q in enumerate(query_templates):`
performance comparison 2020-03-03 13:38:45 +00:00			`keys = set(n for _, n, _, _ in string.Formatter().parse(q) if n)`
			`values = [available_parameters[k] for k in keys]`
Explicitly mark short perftest queries 2020-06-23 12:09:54 +00:00			`combos = itertools.product(*values)`
			`for c in combos:`
			`with_keys = dict(zip(keys, c))`
			`query_results.append(q.format(**with_keys))`
			`for j, t in enumerate(other_templates):`
			`other_results[j].append(t[i].format(**with_keys))`
			`if len(other_templates):`
			`return query_results, *other_results`
			`else:`
			`return query_results`
[wip] some experimental scripts for peformance comparison 2019-12-26 17:35:41 +00:00
performance comparison 2020-02-14 12:55:47 +00:00			`report_stage_end('substitute')`

performance comparison 2020-03-02 15:05:58 +00:00			`# Run drop queries, ignoring errors. Do this before all other activity, because`
			`# clickhouse_driver disconnects on error (this is not configurable), and the new`
			`# connection loses the changes in settings.`
[wip] some experimental scripts for peformance comparison 2019-12-26 17:35:41 +00:00			`drop_query_templates = [q.text for q in root.findall('drop_query')]`
performance comparison 2020-03-03 13:38:45 +00:00			`drop_queries = substitute_parameters(drop_query_templates)`
[wip] some experimental scripts for peformance comparison 2019-12-26 17:35:41 +00:00			`for c in connections:`
			`for q in drop_queries:`
			`try:`
			`c.execute(q)`
			`except:`
[wip] performance comparison test 2020-01-16 19:39:07 +00:00			`pass`
[wip] some experimental scripts for peformance comparison 2019-12-26 17:35:41 +00:00
performance comparison 2020-03-02 15:05:58 +00:00			`report_stage_end('drop1')`

cleanup 2020-06-05 12:53:47 +00:00			`# Apply settings.`
			`# If there are errors, report them and continue -- maybe a new test uses a setting`
			`# that is not in master, but the queries can still run. If we have multiple`
			`# settings and one of them throws an exception, all previous settings for this`
			`# connection will be reset, because the driver reconnects on error (not`
			`# configurable). So the end result is uncertain, but hopefully we'll be able to`
			`# run at least some queries.`
performance comparison 2020-03-02 15:05:58 +00:00			`settings = root.findall('settings/*')`
			`for c in connections:`
			`for s in settings:`
cleanup 2020-06-05 12:53:47 +00:00			`try:`
			`c.execute("set {} = '{}'".format(s.tag, s.text))`
			`except:`
			`print(traceback.format_exc(), file=sys.stderr)`
performance comparison 2020-03-02 15:05:58 +00:00
			`report_stage_end('settings')`

			`# Check tables that should exist. If they don't exist, just skip this test.`
			`tables = [e.text for e in root.findall('preconditions/table_exists')]`
			`for t in tables:`
			`for c in connections:`
			`try:`
Fix some performance tests 2020-03-11 21:07:34 +00:00			`res = c.execute("select 1 from {} limit 1".format(t))`
performance comparison 2020-03-02 15:05:58 +00:00			`except:`
performance comparison 2020-05-25 01:03:21 +00:00			`exception_message = traceback.format_exception_only(*sys.exc_info()[:2])[-1]`
			`skipped_message = ' '.join(exception_message.split('\n')[:2])`
			`print(f'skipped\t{tsv_escape(skipped_message)}')`
performance comparison 2020-03-02 15:05:58 +00:00			`sys.exit(0)`

			`report_stage_end('preconditions')`

[wip] some experimental scripts for peformance comparison 2019-12-26 17:35:41 +00:00			`# Run create queries`
			`create_query_templates = [q.text for q in root.findall('create_query')]`
performance comparison 2020-03-03 13:38:45 +00:00			`create_queries = substitute_parameters(create_query_templates)`
[wip] some experimental scripts for peformance comparison 2019-12-26 17:35:41 +00:00			`for c in connections:`
			`for q in create_queries:`
			`c.execute(q)`

			`# Run fill queries`
			`fill_query_templates = [q.text for q in root.findall('fill_query')]`
performance comparison 2020-03-03 13:38:45 +00:00			`fill_queries = substitute_parameters(fill_query_templates)`
[wip] some experimental scripts for peformance comparison 2019-12-26 17:35:41 +00:00			`for c in connections:`
			`for q in fill_queries:`
			`c.execute(q)`

performance comparison 2020-02-14 12:55:47 +00:00			`report_stage_end('fill')`

Explicitly mark short perftest queries 2020-06-23 12:09:54 +00:00			`# Build a list of test queries, substituting parameters to query templates,`
			`# and reporting the queries marked as short.`
			`test_queries = []`
			`for e in root.findall('query'):`
			`new_queries = []`
			`if 'short' in e.attrib:`
			`new_queries, short = substitute_parameters([e.text], [e.attrib['short']])`
			`print(new_queries)`
			`print(short)`
			`for i, s in enumerate(short):`
			`if eval(s):`
			`print(f'short\t{i + len(test_queries)}')`
			`else:`
			`new_queries = substitute_parameters([e.text])`

			`test_queries += new_queries`
[wip] some experimental scripts for peformance comparison 2019-12-26 17:35:41 +00:00
performance comparison 2020-02-14 12:55:47 +00:00			`report_stage_end('substitute2')`

Explicitly mark short perftest queries 2020-06-23 12:09:54 +00:00			`# Run test queries.`
[wip] Add memory stats to performance test 2020-05-20 02:19:19 +00:00			`for query_index, q in enumerate(test_queries):`
			`query_prefix = f'{test_name}.query{query_index}'`

performance comparison 2020-04-29 09:28:12 +00:00			`# We have some crazy long queries (about 100kB), so trim them to a sane`
[wip] Add memory stats to performance test 2020-05-20 02:19:19 +00:00			`# length. This means we can't use query text as an identifier and have to`
			`# use the test name + the test-wide query index.`
performance comparison 2020-04-29 09:28:12 +00:00			`query_display_name = q`
			`if len(query_display_name) > 1000:`
Update perf.py 2020-05-21 18:06:08 +00:00			`query_display_name = f'{query_display_name[:1000]}...({query_index})'`
performance comparison 2020-04-29 09:28:12 +00:00
[wip] Add memory stats to performance test 2020-05-20 02:19:19 +00:00			`print(f'display-name\t{query_index}\t{tsv_escape(query_display_name)}')`
performance comparison 2020-04-29 09:28:12 +00:00
performance comparison 2020-02-11 20:00:53 +00:00			`# Prewarm: run once on both servers. Helps to bring the data into memory,`
performance comparison 2020-02-11 15:01:16 +00:00			`# precompile the queries, etc.`
performance comparison 2020-04-17 15:47:01 +00:00			`try:`
			`for conn_index, c in enumerate(connections):`
[wip] Add memory stats to performance test 2020-05-20 02:19:19 +00:00			`prewarm_id = f'{query_prefix}.prewarm0'`
			`res = c.execute(q, query_id = prewarm_id)`
			`print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}')`
Function for printing array of Asts 2020-05-25 03:16:55 +00:00			`except KeyboardInterrupt:`
			`raise`
performance comparison 2020-04-17 15:47:01 +00:00			`except:`
			`# If prewarm fails for some query -- skip it, and try to test the others.`
			`# This might happen if the new test introduces some function that the`
			`# old server doesn't support. Still, report it as an error.`
performance comparison 2020-04-29 09:28:12 +00:00			`# FIXME the driver reconnects on error and we lose settings, so this might`
			`# lead to further errors or unexpected behavior.`
performance comparison 2020-04-17 15:47:01 +00:00			`print(traceback.format_exc(), file=sys.stderr)`
			`continue`
performance comparison 2020-02-11 15:01:16 +00:00
			`# Now, perform measured runs.`
[wip] performance comparison test 2020-01-16 19:39:07 +00:00			`# Track the time spent by the client to process this query, so that we can notice`
			`# out the queries that take long to process on the client side, e.g. by sending`
			`# excessive data.`
			`start_seconds = time.perf_counter()`
			`server_seconds = 0`
[wip] performance comparison 2020-01-21 18:15:25 +00:00			`for run in range(0, args.runs):`
[wip] Add memory stats to performance test 2020-05-20 02:19:19 +00:00			`run_id = f'{query_prefix}.run{run}'`
[wip] some experimental scripts for peformance comparison 2019-12-26 17:35:41 +00:00			`for conn_index, c in enumerate(connections):`
[wip] Add memory stats to performance test 2020-05-20 02:19:19 +00:00			`res = c.execute(q, query_id = run_id)`
			`print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}')`
[wip] performance comparison test 2020-01-16 19:39:07 +00:00			`server_seconds += c.last_query.elapsed`

			`client_seconds = time.perf_counter() - start_seconds`
[wip] Add memory stats to performance test 2020-05-20 02:19:19 +00:00			`print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}')`
[wip] some experimental scripts for peformance comparison 2019-12-26 17:35:41 +00:00
performance comparison 2020-02-14 12:55:47 +00:00			`report_stage_end('benchmark')`

[wip] some experimental scripts for peformance comparison 2019-12-26 17:35:41 +00:00			`# Run drop queries`
			`drop_query_templates = [q.text for q in root.findall('drop_query')]`
performance comparison 2020-03-03 13:38:45 +00:00			`drop_queries = substitute_parameters(drop_query_templates)`
[wip] some experimental scripts for peformance comparison 2019-12-26 17:35:41 +00:00			`for c in connections:`
			`for q in drop_queries:`
			`c.execute(q)`
performance comparison 2020-02-14 12:55:47 +00:00
performance comparison 2020-03-02 15:05:58 +00:00			`report_stage_end('drop2')`