diff --git a/benchmark/greenplum/result_parser.py b/benchmark/greenplum/result_parser.py
index 8af20d265a0..4ed1aa5c4a5 100755
--- a/benchmark/greenplum/result_parser.py
+++ b/benchmark/greenplum/result_parser.py
@@ -4,11 +4,12 @@
import sys
import json
+
def parse_block(block=[], options=[]):
- #print('block is here', block)
- #show_query = False
- #show_query = options.show_query
+ # print('block is here', block)
+ # show_query = False
+ # show_query = options.show_query
result = []
query = block[0].strip()
if len(block) > 4:
@@ -20,9 +21,9 @@ def parse_block(block=[], options=[]):
timing2 = block[2].strip().split()[1]
timing3 = block[3].strip().split()[1]
if options.show_queries:
- result.append( query )
+ result.append(query)
if not options.show_first_timings:
- result += [ timing1 , timing2, timing3 ]
+ result += [timing1, timing2, timing3]
else:
result.append(timing1)
return result
@@ -37,12 +38,12 @@ def read_stats_file(options, fname):
for line in f.readlines():
- if 'SELECT' in line:
+ if "SELECT" in line:
if len(block) > 1:
- result.append( parse_block(block, options) )
- block = [ line ]
- elif 'Time:' in line:
- block.append( line )
+ result.append(parse_block(block, options))
+ block = [line]
+ elif "Time:" in line:
+ block.append(line)
return result
@@ -50,7 +51,7 @@ def read_stats_file(options, fname):
def compare_stats_files(options, arguments):
result = []
file_output = []
- pyplot_colors = ['y', 'b', 'g', 'r']
+ pyplot_colors = ["y", "b", "g", "r"]
for fname in arguments[1:]:
file_output.append((read_stats_file(options, fname)))
if len(file_output[0]) > 0:
@@ -58,65 +59,92 @@ def compare_stats_files(options, arguments):
for idx, data_set in enumerate(file_output):
int_result = []
for timing in data_set:
- int_result.append(float(timing[0])) #y values
- result.append([[x for x in range(0, len(int_result)) ], int_result,
-pyplot_colors[idx] + '^' ] )
-# result.append([x for x in range(1, len(int_result)) ]) #x values
-# result.append( pyplot_colors[idx] + '^' )
+ int_result.append(float(timing[0])) # y values
+ result.append(
+ [
+ [x for x in range(0, len(int_result))],
+ int_result,
+ pyplot_colors[idx] + "^",
+ ]
+ )
+ # result.append([x for x in range(1, len(int_result)) ]) #x values
+ # result.append( pyplot_colors[idx] + '^' )
return result
+
def parse_args():
from optparse import OptionParser
- parser = OptionParser(usage='usage: %prog [options] [result_file_path]..')
- parser.add_option("-q", "--show-queries", help="Show statements along with timings", action="store_true", dest="show_queries")
- parser.add_option("-f", "--show-first-timings", help="Show only first tries timings", action="store_true", dest="show_first_timings")
- parser.add_option("-c", "--compare-mode", help="Prepare output for pyplot comparing result files.", action="store", dest="compare_mode")
+
+ parser = OptionParser(usage="usage: %prog [options] [result_file_path]..")
+ parser.add_option(
+ "-q",
+ "--show-queries",
+ help="Show statements along with timings",
+ action="store_true",
+ dest="show_queries",
+ )
+ parser.add_option(
+ "-f",
+ "--show-first-timings",
+ help="Show only first tries timings",
+ action="store_true",
+ dest="show_first_timings",
+ )
+ parser.add_option(
+ "-c",
+ "--compare-mode",
+ help="Prepare output for pyplot comparing result files.",
+ action="store",
+ dest="compare_mode",
+ )
(options, arguments) = parser.parse_args(sys.argv)
if len(arguments) < 2:
parser.print_usage()
sys.exit(1)
- return ( options, arguments )
+ return (options, arguments)
+
def gen_pyplot_code(options, arguments):
- result = ''
+ result = ""
data_sets = compare_stats_files(options, arguments)
for idx, data_set in enumerate(data_sets, start=0):
x_values, y_values, line_style = data_set
- result += '\nplt.plot('
- result += '%s, %s, \'%s\'' % ( x_values, y_values, line_style )
- result += ', label=\'%s try\')' % idx
- print('import matplotlib.pyplot as plt')
+ result += "\nplt.plot("
+ result += "%s, %s, '%s'" % (x_values, y_values, line_style)
+ result += ", label='%s try')" % idx
+ print("import matplotlib.pyplot as plt")
print(result)
- print( 'plt.xlabel(\'Try number\')' )
- print( 'plt.ylabel(\'Timing\')' )
- print( 'plt.title(\'Benchmark query timings\')' )
- print('plt.legend()')
- print('plt.show()')
+ print("plt.xlabel('Try number')")
+ print("plt.ylabel('Timing')")
+ print("plt.title('Benchmark query timings')")
+ print("plt.legend()")
+ print("plt.show()")
def gen_html_json(options, arguments):
tuples = read_stats_file(options, arguments[1])
- print('{')
+ print("{")
print('"system: GreenPlum(x2),')
- print(('"version": "%s",' % '4.3.9.1'))
+ print(('"version": "%s",' % "4.3.9.1"))
print('"data_size": 10000000,')
print('"time": "",')
print('"comments": "",')
print('"result":')
- print('[')
+ print("[")
for s in tuples:
print(s)
- print(']')
- print('}')
+ print("]")
+ print("}")
def main():
- ( options, arguments ) = parse_args()
+ (options, arguments) = parse_args()
if len(arguments) > 2:
gen_pyplot_code(options, arguments)
else:
gen_html_json(options, arguments)
-if __name__ == '__main__':
+
+if __name__ == "__main__":
main()
diff --git a/docker/test/fuzzer/generate-test-j2.py b/docker/test/fuzzer/generate-test-j2.py
index bcc1bf6bc84..11525163ed8 100755
--- a/docker/test/fuzzer/generate-test-j2.py
+++ b/docker/test/fuzzer/generate-test-j2.py
@@ -11,7 +11,7 @@ def removesuffix(text, suffix):
https://www.python.org/dev/peps/pep-0616/
"""
if suffix and text.endswith(suffix):
- return text[:-len(suffix)]
+ return text[: -len(suffix)]
else:
return text[:]
diff --git a/docker/test/integration/hive_server/http_api_server.py b/docker/test/integration/hive_server/http_api_server.py
index 4818b785c89..8a9d3da4846 100644
--- a/docker/test/integration/hive_server/http_api_server.py
+++ b/docker/test/integration/hive_server/http_api_server.py
@@ -3,55 +3,55 @@ import subprocess
import datetime
from flask import Flask, flash, request, redirect, url_for
+
def run_command(command, wait=False):
print("{} - execute shell command:{}".format(datetime.datetime.now(), command))
lines = []
- p = subprocess.Popen(command,
- stdout=subprocess.PIPE,
- stderr=subprocess.STDOUT,
- shell=True)
+ p = subprocess.Popen(
+ command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True
+ )
if wait:
- for l in iter(p.stdout.readline, b''):
+ for l in iter(p.stdout.readline, b""):
lines.append(l)
p.poll()
return (lines, p.returncode)
else:
- return(iter(p.stdout.readline, b''), 0)
+ return (iter(p.stdout.readline, b""), 0)
-UPLOAD_FOLDER = './'
-ALLOWED_EXTENSIONS = {'txt', 'sh'}
+UPLOAD_FOLDER = "./"
+ALLOWED_EXTENSIONS = {"txt", "sh"}
app = Flask(__name__)
-app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
+app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER
-@app.route('/')
+
+@app.route("/")
def hello_world():
- return 'Hello World'
+ return "Hello World"
def allowed_file(filename):
- return '.' in filename and \
- filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
+ return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
-@app.route('/upload', methods=['GET', 'POST'])
+@app.route("/upload", methods=["GET", "POST"])
def upload_file():
- if request.method == 'POST':
+ if request.method == "POST":
# check if the post request has the file part
- if 'file' not in request.files:
- flash('No file part')
+ if "file" not in request.files:
+ flash("No file part")
return redirect(request.url)
- file = request.files['file']
+ file = request.files["file"]
# If the user does not select a file, the browser submits an
# empty file without a filename.
- if file.filename == '':
- flash('No selected file')
+ if file.filename == "":
+ flash("No selected file")
return redirect(request.url)
if file and allowed_file(file.filename):
filename = file.filename
- file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
- return redirect(url_for('upload_file', name=filename))
- return '''
+ file.save(os.path.join(app.config["UPLOAD_FOLDER"], filename))
+ return redirect(url_for("upload_file", name=filename))
+ return """
Upload new File
Upload new File
@@ -59,12 +59,15 @@ def upload_file():
- '''
-@app.route('/run', methods=['GET', 'POST'])
+ """
+
+
+@app.route("/run", methods=["GET", "POST"])
def parse_request():
data = request.data # data is empty
run_command(data, wait=True)
- return 'Ok'
+ return "Ok"
-if __name__ == '__main__':
- app.run(port=5011)
+
+if __name__ == "__main__":
+ app.run(port=5011)
diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py
index 61987d34299..2266641397b 100755
--- a/docker/test/performance-comparison/perf.py
+++ b/docker/test/performance-comparison/perf.py
@@ -19,58 +19,126 @@ import xml.etree.ElementTree as et
from threading import Thread
from scipy import stats
-logging.basicConfig(format='%(asctime)s: %(levelname)s: %(module)s: %(message)s', level='WARNING')
+logging.basicConfig(
+ format="%(asctime)s: %(levelname)s: %(module)s: %(message)s", level="WARNING"
+)
total_start_seconds = time.perf_counter()
stage_start_seconds = total_start_seconds
+
def reportStageEnd(stage):
global stage_start_seconds, total_start_seconds
current = time.perf_counter()
- print(f'stage\t{stage}\t{current - stage_start_seconds:.3f}\t{current - total_start_seconds:.3f}')
+ print(
+ f"stage\t{stage}\t{current - stage_start_seconds:.3f}\t{current - total_start_seconds:.3f}"
+ )
stage_start_seconds = current
def tsv_escape(s):
- return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','')
+ return (
+ s.replace("\\", "\\\\")
+ .replace("\t", "\\t")
+ .replace("\n", "\\n")
+ .replace("\r", "")
+ )
-parser = argparse.ArgumentParser(description='Run performance test.')
+parser = argparse.ArgumentParser(description="Run performance test.")
# Explicitly decode files as UTF-8 because sometimes we have Russian characters in queries, and LANG=C is set.
-parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, help='test description file')
-parser.add_argument('--host', nargs='*', default=['localhost'], help="Space-separated list of server hostname(s). Corresponds to '--port' options.")
-parser.add_argument('--port', nargs='*', default=[9000], help="Space-separated list of server port(s). Corresponds to '--host' options.")
-parser.add_argument('--runs', type=int, default=1, help='Number of query runs per server.')
-parser.add_argument('--max-queries', type=int, default=None, help='Test no more than this number of queries, chosen at random.')
-parser.add_argument('--queries-to-run', nargs='*', type=int, default=None, help='Space-separated list of indexes of queries to test.')
-parser.add_argument('--max-query-seconds', type=int, default=15, help='For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.')
-parser.add_argument('--prewarm-max-query-seconds', type=int, default=180, help='For how many seconds at most a prewarm (cold storage) query is allowed to run. The script finishes with error if this time is exceeded.')
-parser.add_argument('--profile-seconds', type=int, default=0, help='For how many seconds to profile a query for which the performance has changed.')
-parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.')
-parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.')
-parser.add_argument('--print-settings', action='store_true', help='Print test settings and exit.')
-parser.add_argument('--keep-created-tables', action='store_true', help="Don't drop the created tables after the test.")
-parser.add_argument('--use-existing-tables', action='store_true', help="Don't create or drop the tables, use the existing ones instead.")
+parser.add_argument(
+ "file",
+ metavar="FILE",
+ type=argparse.FileType("r", encoding="utf-8"),
+ nargs=1,
+ help="test description file",
+)
+parser.add_argument(
+ "--host",
+ nargs="*",
+ default=["localhost"],
+ help="Space-separated list of server hostname(s). Corresponds to '--port' options.",
+)
+parser.add_argument(
+ "--port",
+ nargs="*",
+ default=[9000],
+ help="Space-separated list of server port(s). Corresponds to '--host' options.",
+)
+parser.add_argument(
+ "--runs", type=int, default=1, help="Number of query runs per server."
+)
+parser.add_argument(
+ "--max-queries",
+ type=int,
+ default=None,
+ help="Test no more than this number of queries, chosen at random.",
+)
+parser.add_argument(
+ "--queries-to-run",
+ nargs="*",
+ type=int,
+ default=None,
+ help="Space-separated list of indexes of queries to test.",
+)
+parser.add_argument(
+ "--max-query-seconds",
+ type=int,
+ default=15,
+ help="For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.",
+)
+parser.add_argument(
+ "--prewarm-max-query-seconds",
+ type=int,
+ default=180,
+ help="For how many seconds at most a prewarm (cold storage) query is allowed to run. The script finishes with error if this time is exceeded.",
+)
+parser.add_argument(
+ "--profile-seconds",
+ type=int,
+ default=0,
+ help="For how many seconds to profile a query for which the performance has changed.",
+)
+parser.add_argument(
+ "--long", action="store_true", help="Do not skip the tests tagged as long."
+)
+parser.add_argument(
+ "--print-queries", action="store_true", help="Print test queries and exit."
+)
+parser.add_argument(
+ "--print-settings", action="store_true", help="Print test settings and exit."
+)
+parser.add_argument(
+ "--keep-created-tables",
+ action="store_true",
+ help="Don't drop the created tables after the test.",
+)
+parser.add_argument(
+ "--use-existing-tables",
+ action="store_true",
+ help="Don't create or drop the tables, use the existing ones instead.",
+)
args = parser.parse_args()
-reportStageEnd('start')
+reportStageEnd("start")
test_name = os.path.splitext(os.path.basename(args.file[0].name))[0]
tree = et.parse(args.file[0])
root = tree.getroot()
-reportStageEnd('parse')
+reportStageEnd("parse")
# Process query parameters
-subst_elems = root.findall('substitutions/substitution')
-available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... }
+subst_elems = root.findall("substitutions/substitution")
+available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... }
for e in subst_elems:
- name = e.find('name').text
- values = [v.text for v in e.findall('values/value')]
+ name = e.find("name").text
+ values = [v.text for v in e.findall("values/value")]
if not values:
- raise Exception(f'No values given for substitution {{{name}}}')
+ raise Exception(f"No values given for substitution {{{name}}}")
available_parameters[name] = values
@@ -78,7 +146,7 @@ for e in subst_elems:
# parameters. The set of parameters is determined based on the first list.
# Note: keep the order of queries -- sometimes we have DROP IF EXISTS
# followed by CREATE in create queries section, so the order matters.
-def substitute_parameters(query_templates, other_templates = []):
+def substitute_parameters(query_templates, other_templates=[]):
query_results = []
other_results = [[]] * (len(other_templates))
for i, q in enumerate(query_templates):
@@ -103,17 +171,21 @@ def substitute_parameters(query_templates, other_templates = []):
# and reporting the queries marked as short.
test_queries = []
is_short = []
-for e in root.findall('query'):
- new_queries, [new_is_short] = substitute_parameters([e.text], [[e.attrib.get('short', '0')]])
+for e in root.findall("query"):
+ new_queries, [new_is_short] = substitute_parameters(
+ [e.text], [[e.attrib.get("short", "0")]]
+ )
test_queries += new_queries
is_short += [eval(s) for s in new_is_short]
-assert(len(test_queries) == len(is_short))
+assert len(test_queries) == len(is_short)
# If we're given a list of queries to run, check that it makes sense.
for i in args.queries_to_run or []:
if i < 0 or i >= len(test_queries):
- print(f'There is no query no. {i} in this test, only [{0}-{len(test_queries) - 1}] are present')
+ print(
+ f"There is no query no. {i} in this test, only [{0}-{len(test_queries) - 1}] are present"
+ )
exit(1)
# If we're only asked to print the queries, do that and exit.
@@ -125,60 +197,65 @@ if args.print_queries:
# Print short queries
for i, s in enumerate(is_short):
if s:
- print(f'short\t{i}')
+ print(f"short\t{i}")
# If we're only asked to print the settings, do that and exit. These are settings
# for clickhouse-benchmark, so we print them as command line arguments, e.g.
# '--max_memory_usage=10000000'.
if args.print_settings:
- for s in root.findall('settings/*'):
- print(f'--{s.tag}={s.text}')
+ for s in root.findall("settings/*"):
+ print(f"--{s.tag}={s.text}")
exit(0)
# Skip long tests
if not args.long:
- for tag in root.findall('.//tag'):
- if tag.text == 'long':
- print('skipped\tTest is tagged as long.')
+ for tag in root.findall(".//tag"):
+ if tag.text == "long":
+ print("skipped\tTest is tagged as long.")
sys.exit(0)
# Print report threshold for the test if it is set.
ignored_relative_change = 0.05
-if 'max_ignored_relative_change' in root.attrib:
+if "max_ignored_relative_change" in root.attrib:
ignored_relative_change = float(root.attrib["max_ignored_relative_change"])
- print(f'report-threshold\t{ignored_relative_change}')
+ print(f"report-threshold\t{ignored_relative_change}")
-reportStageEnd('before-connect')
+reportStageEnd("before-connect")
# Open connections
-servers = [{'host': host or args.host[0], 'port': port or args.port[0]} for (host, port) in itertools.zip_longest(args.host, args.port)]
+servers = [
+ {"host": host or args.host[0], "port": port or args.port[0]}
+ for (host, port) in itertools.zip_longest(args.host, args.port)
+]
# Force settings_is_important to fail queries on unknown settings.
-all_connections = [clickhouse_driver.Client(**server, settings_is_important=True) for server in servers]
+all_connections = [
+ clickhouse_driver.Client(**server, settings_is_important=True) for server in servers
+]
for i, s in enumerate(servers):
print(f'server\t{i}\t{s["host"]}\t{s["port"]}')
-reportStageEnd('connect')
+reportStageEnd("connect")
if not args.use_existing_tables:
# Run drop queries, ignoring errors. Do this before all other activity,
# because clickhouse_driver disconnects on error (this is not configurable),
# and the new connection loses the changes in settings.
- drop_query_templates = [q.text for q in root.findall('drop_query')]
+ drop_query_templates = [q.text for q in root.findall("drop_query")]
drop_queries = substitute_parameters(drop_query_templates)
for conn_index, c in enumerate(all_connections):
for q in drop_queries:
try:
c.execute(q)
- print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
+ print(f"drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}")
except:
pass
- reportStageEnd('drop-1')
+ reportStageEnd("drop-1")
# Apply settings.
-settings = root.findall('settings/*')
+settings = root.findall("settings/*")
for conn_index, c in enumerate(all_connections):
for s in settings:
# requires clickhouse-driver >= 1.1.5 to accept arbitrary new settings
@@ -189,48 +266,52 @@ for conn_index, c in enumerate(all_connections):
# the test, which is wrong.
c.execute("select 1")
-reportStageEnd('settings')
+reportStageEnd("settings")
# Check tables that should exist. If they don't exist, just skip this test.
-tables = [e.text for e in root.findall('preconditions/table_exists')]
+tables = [e.text for e in root.findall("preconditions/table_exists")]
for t in tables:
for c in all_connections:
try:
res = c.execute("select 1 from {} limit 1".format(t))
except:
exception_message = traceback.format_exception_only(*sys.exc_info()[:2])[-1]
- skipped_message = ' '.join(exception_message.split('\n')[:2])
- print(f'skipped\t{tsv_escape(skipped_message)}')
+ skipped_message = " ".join(exception_message.split("\n")[:2])
+ print(f"skipped\t{tsv_escape(skipped_message)}")
sys.exit(0)
-reportStageEnd('preconditions')
+reportStageEnd("preconditions")
if not args.use_existing_tables:
# Run create and fill queries. We will run them simultaneously for both
# servers, to save time. The weird XML search + filter is because we want to
# keep the relative order of elements, and etree doesn't support the
# appropriate xpath query.
- create_query_templates = [q.text for q in root.findall('./*')
- if q.tag in ('create_query', 'fill_query')]
+ create_query_templates = [
+ q.text for q in root.findall("./*") if q.tag in ("create_query", "fill_query")
+ ]
create_queries = substitute_parameters(create_query_templates)
# Disallow temporary tables, because the clickhouse_driver reconnects on
# errors, and temporary tables are destroyed. We want to be able to continue
# after some errors.
for q in create_queries:
- if re.search('create temporary table', q, flags=re.IGNORECASE):
- print(f"Temporary tables are not allowed in performance tests: '{q}'",
- file = sys.stderr)
+ if re.search("create temporary table", q, flags=re.IGNORECASE):
+ print(
+ f"Temporary tables are not allowed in performance tests: '{q}'",
+ file=sys.stderr,
+ )
sys.exit(1)
def do_create(connection, index, queries):
for q in queries:
connection.execute(q)
- print(f'create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}')
+ print(f"create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}")
threads = [
- Thread(target = do_create, args = (connection, index, create_queries))
- for index, connection in enumerate(all_connections)]
+ Thread(target=do_create, args=(connection, index, create_queries))
+ for index, connection in enumerate(all_connections)
+ ]
for t in threads:
t.start()
@@ -238,14 +319,16 @@ if not args.use_existing_tables:
for t in threads:
t.join()
- reportStageEnd('create')
+ reportStageEnd("create")
# By default, test all queries.
queries_to_run = range(0, len(test_queries))
if args.max_queries:
# If specified, test a limited number of queries chosen at random.
- queries_to_run = random.sample(range(0, len(test_queries)), min(len(test_queries), args.max_queries))
+ queries_to_run = random.sample(
+ range(0, len(test_queries)), min(len(test_queries), args.max_queries)
+ )
if args.queries_to_run:
# Run the specified queries.
@@ -255,16 +338,16 @@ if args.queries_to_run:
profile_total_seconds = 0
for query_index in queries_to_run:
q = test_queries[query_index]
- query_prefix = f'{test_name}.query{query_index}'
+ query_prefix = f"{test_name}.query{query_index}"
# We have some crazy long queries (about 100kB), so trim them to a sane
# length. This means we can't use query text as an identifier and have to
# use the test name + the test-wide query index.
query_display_name = q
if len(query_display_name) > 1000:
- query_display_name = f'{query_display_name[:1000]}...({query_index})'
+ query_display_name = f"{query_display_name[:1000]}...({query_index})"
- print(f'display-name\t{query_index}\t{tsv_escape(query_display_name)}')
+ print(f"display-name\t{query_index}\t{tsv_escape(query_display_name)}")
# Prewarm: run once on both servers. Helps to bring the data into memory,
# precompile the queries, etc.
@@ -272,10 +355,10 @@ for query_index in queries_to_run:
# new one. We want to run them on the new server only, so that the PR author
# can ensure that the test works properly. Remember the errors we had on
# each server.
- query_error_on_connection = [None] * len(all_connections);
+ query_error_on_connection = [None] * len(all_connections)
for conn_index, c in enumerate(all_connections):
try:
- prewarm_id = f'{query_prefix}.prewarm0'
+ prewarm_id = f"{query_prefix}.prewarm0"
try:
# During the warmup runs, we will also:
@@ -283,25 +366,30 @@ for query_index in queries_to_run:
# * collect profiler traces, which might be helpful for analyzing
# test coverage. We disable profiler for normal runs because
# it makes the results unstable.
- res = c.execute(q, query_id = prewarm_id,
- settings = {
- 'max_execution_time': args.prewarm_max_query_seconds,
- 'query_profiler_real_time_period_ns': 10000000,
- 'memory_profiler_step': '4Mi',
- })
+ res = c.execute(
+ q,
+ query_id=prewarm_id,
+ settings={
+ "max_execution_time": args.prewarm_max_query_seconds,
+ "query_profiler_real_time_period_ns": 10000000,
+ "memory_profiler_step": "4Mi",
+ },
+ )
except clickhouse_driver.errors.Error as e:
# Add query id to the exception to make debugging easier.
e.args = (prewarm_id, *e.args)
- e.message = prewarm_id + ': ' + e.message
+ e.message = prewarm_id + ": " + e.message
raise
- print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}')
+ print(
+ f"prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}"
+ )
except KeyboardInterrupt:
raise
except:
# FIXME the driver reconnects on error and we lose settings, so this
# might lead to further errors or unexpected behavior.
- query_error_on_connection[conn_index] = traceback.format_exc();
+ query_error_on_connection[conn_index] = traceback.format_exc()
continue
# Report all errors that ocurred during prewarm and decide what to do next.
@@ -311,14 +399,14 @@ for query_index in queries_to_run:
no_errors = []
for i, e in enumerate(query_error_on_connection):
if e:
- print(e, file = sys.stderr)
+ print(e, file=sys.stderr)
else:
no_errors.append(i)
if len(no_errors) == 0:
continue
elif len(no_errors) < len(all_connections):
- print(f'partial\t{query_index}\t{no_errors}')
+ print(f"partial\t{query_index}\t{no_errors}")
this_query_connections = [all_connections[index] for index in no_errors]
@@ -337,27 +425,34 @@ for query_index in queries_to_run:
all_server_times.append([])
while True:
- run_id = f'{query_prefix}.run{run}'
+ run_id = f"{query_prefix}.run{run}"
for conn_index, c in enumerate(this_query_connections):
try:
- res = c.execute(q, query_id = run_id, settings = {'max_execution_time': args.max_query_seconds})
+ res = c.execute(
+ q,
+ query_id=run_id,
+ settings={"max_execution_time": args.max_query_seconds},
+ )
except clickhouse_driver.errors.Error as e:
# Add query id to the exception to make debugging easier.
e.args = (run_id, *e.args)
- e.message = run_id + ': ' + e.message
+ e.message = run_id + ": " + e.message
raise
elapsed = c.last_query.elapsed
all_server_times[conn_index].append(elapsed)
server_seconds += elapsed
- print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}')
+ print(f"query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}")
if elapsed > args.max_query_seconds:
# Do not stop processing pathologically slow queries,
# since this may hide errors in other queries.
- print(f'The query no. {query_index} is taking too long to run ({elapsed} s)', file=sys.stderr)
+ print(
+ f"The query no. {query_index} is taking too long to run ({elapsed} s)",
+ file=sys.stderr,
+ )
# Be careful with the counter, after this line it's the next iteration
# already.
@@ -386,7 +481,7 @@ for query_index in queries_to_run:
break
client_seconds = time.perf_counter() - start_seconds
- print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}')
+ print(f"client-time\t{query_index}\t{client_seconds}\t{server_seconds}")
# Run additional profiling queries to collect profile data, but only if test times appeared to be different.
# We have to do it after normal runs because otherwise it will affect test statistics too much
@@ -397,13 +492,15 @@ for query_index in queries_to_run:
# Don't fail if for some reason there are not enough measurements.
continue
- pvalue = stats.ttest_ind(all_server_times[0], all_server_times[1], equal_var = False).pvalue
+ pvalue = stats.ttest_ind(
+ all_server_times[0], all_server_times[1], equal_var=False
+ ).pvalue
median = [statistics.median(t) for t in all_server_times]
# Keep this consistent with the value used in report. Should eventually move
# to (median[1] - median[0]) / min(median), which is compatible with "times"
# difference we use in report (max(median) / min(median)).
relative_diff = (median[1] - median[0]) / median[0]
- print(f'diff\t{query_index}\t{median[0]}\t{median[1]}\t{relative_diff}\t{pvalue}')
+ print(f"diff\t{query_index}\t{median[0]}\t{median[1]}\t{relative_diff}\t{pvalue}")
if abs(relative_diff) < ignored_relative_change or pvalue > 0.05:
continue
@@ -412,25 +509,31 @@ for query_index in queries_to_run:
profile_start_seconds = time.perf_counter()
run = 0
while time.perf_counter() - profile_start_seconds < args.profile_seconds:
- run_id = f'{query_prefix}.profile{run}'
+ run_id = f"{query_prefix}.profile{run}"
for conn_index, c in enumerate(this_query_connections):
try:
- res = c.execute(q, query_id = run_id, settings = {'query_profiler_real_time_period_ns': 10000000})
- print(f'profile\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}')
+ res = c.execute(
+ q,
+ query_id=run_id,
+ settings={"query_profiler_real_time_period_ns": 10000000},
+ )
+ print(
+ f"profile\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}"
+ )
except clickhouse_driver.errors.Error as e:
# Add query id to the exception to make debugging easier.
e.args = (run_id, *e.args)
- e.message = run_id + ': ' + e.message
+ e.message = run_id + ": " + e.message
raise
run += 1
profile_total_seconds += time.perf_counter() - profile_start_seconds
-print(f'profile-total\t{profile_total_seconds}')
+print(f"profile-total\t{profile_total_seconds}")
-reportStageEnd('run')
+reportStageEnd("run")
# Run drop queries
if not args.keep_created_tables and not args.use_existing_tables:
@@ -438,6 +541,6 @@ if not args.keep_created_tables and not args.use_existing_tables:
for conn_index, c in enumerate(all_connections):
for q in drop_queries:
c.execute(q)
- print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
+ print(f"drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}")
- reportStageEnd('drop-2')
+ reportStageEnd("drop-2")
diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py
index 4cff6b41949..0cb8481ee6e 100755
--- a/docker/test/performance-comparison/report.py
+++ b/docker/test/performance-comparison/report.py
@@ -12,9 +12,13 @@ import pprint
import sys
import traceback
-parser = argparse.ArgumentParser(description='Create performance test report')
-parser.add_argument('--report', default='main', choices=['main', 'all-queries'],
- help='Which report to build')
+parser = argparse.ArgumentParser(description="Create performance test report")
+parser.add_argument(
+ "--report",
+ default="main",
+ choices=["main", "all-queries"],
+ help="Which report to build",
+)
args = parser.parse_args()
tables = []
@@ -31,8 +35,8 @@ unstable_partial_queries = 0
# max seconds to run one query by itself, not counting preparation
allowed_single_run_time = 2
-color_bad='#ffb0c0'
-color_good='#b0d050'
+color_bad = "#ffb0c0"
+color_good = "#b0d050"
header_template = """
@@ -151,24 +155,29 @@ tr:nth-child(odd) td {{filter: brightness(90%);}}
table_anchor = 0
row_anchor = 0
+
def currentTableAnchor():
global table_anchor
- return f'{table_anchor}'
+ return f"{table_anchor}"
+
def newTableAnchor():
global table_anchor
table_anchor += 1
return currentTableAnchor()
+
def currentRowAnchor():
global row_anchor
global table_anchor
- return f'{table_anchor}.{row_anchor}'
+ return f"{table_anchor}.{row_anchor}"
+
def nextRowAnchor():
global row_anchor
global table_anchor
- return f'{table_anchor}.{row_anchor + 1}'
+ return f"{table_anchor}.{row_anchor + 1}"
+
def advanceRowAnchor():
global row_anchor
@@ -178,43 +187,58 @@ def advanceRowAnchor():
def tr(x, anchor=None):
- #return '
{x}
'.format(a=a, x=str(x))
+ # return '
{x}
'.format(a=a, x=str(x))
anchor = anchor if anchor else advanceRowAnchor()
- return f'
".format(
+ cell_attributes=cell_attributes, value=value
+ )
+
+
+def tableRow(cell_values, cell_attributes=[], anchor=None):
return tr(
- ''.join([td(v, a)
- for v, a in itertools.zip_longest(
- cell_values, cell_attributes,
- fillvalue = '')
- if a is not None and v is not None]),
- anchor)
+ "".join(
+ [
+ td(v, a)
+ for v, a in itertools.zip_longest(
+ cell_values, cell_attributes, fillvalue=""
+ )
+ if a is not None and v is not None
+ ]
+ ),
+ anchor,
+ )
-def tableHeader(cell_values, cell_attributes = []):
+
+def tableHeader(cell_values, cell_attributes=[]):
return tr(
- ''.join([th(v, a)
- for v, a in itertools.zip_longest(
- cell_values, cell_attributes,
- fillvalue = '')
- if a is not None and v is not None]))
+ "".join(
+ [
+ th(v, a)
+ for v, a in itertools.zip_longest(
+ cell_values, cell_attributes, fillvalue=""
+ )
+ if a is not None and v is not None
+ ]
+ )
+ )
+
def tableStart(title):
- cls = '-'.join(title.lower().split(' ')[:3]);
+ cls = "-".join(title.lower().split(" ")[:3])
global table_anchor
table_anchor = cls
anchor = currentTableAnchor()
- help_anchor = '-'.join(title.lower().split(' '));
+ help_anchor = "-".join(title.lower().split(" "))
return f"""