mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 09:32:06 +00:00
1071 lines
39 KiB
Python
Executable File
1071 lines
39 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
from collections import defaultdict
|
|
import csv
|
|
import glob
|
|
import json
|
|
import logging
|
|
import os
|
|
import random
|
|
import re
|
|
import shutil
|
|
import subprocess
|
|
import time
|
|
import shlex
|
|
import zlib # for crc32
|
|
|
|
|
|
MAX_RETRY = 1
|
|
NUM_WORKERS = 5
|
|
SLEEP_BETWEEN_RETRIES = 5
|
|
PARALLEL_GROUP_SIZE = 100
|
|
CLICKHOUSE_BINARY_PATH = "usr/bin/clickhouse"
|
|
CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH = "usr/bin/clickhouse-odbc-bridge"
|
|
CLICKHOUSE_LIBRARY_BRIDGE_BINARY_PATH = "usr/bin/clickhouse-library-bridge"
|
|
|
|
FLAKY_TRIES_COUNT = 10
|
|
MAX_TIME_SECONDS = 3600
|
|
|
|
MAX_TIME_IN_SANDBOX = 20 * 60 # 20 minutes
|
|
TASK_TIMEOUT = 8 * 60 * 60 # 8 hours
|
|
|
|
NO_CHANGES_MSG = "Nothing to run"
|
|
|
|
|
|
def stringhash(s):
|
|
return zlib.crc32(s.encode("utf-8"))
|
|
|
|
|
|
# Search test by the common prefix.
|
|
# This is accept tests w/o parameters in skip list.
|
|
#
|
|
# Examples:
|
|
# - has_test(['foobar'], 'foobar[param]') == True
|
|
# - has_test(['foobar[param]'], 'foobar') == True
|
|
def has_test(tests, test_to_match):
|
|
for test in tests:
|
|
if len(test_to_match) < len(test):
|
|
if test[0 : len(test_to_match)] == test_to_match:
|
|
return True
|
|
else:
|
|
if test_to_match[0 : len(test)] == test:
|
|
return True
|
|
return False
|
|
|
|
|
|
def get_changed_tests_to_run(pr_info, repo_path):
|
|
result = set()
|
|
changed_files = pr_info["changed_files"]
|
|
|
|
if changed_files is None:
|
|
return []
|
|
|
|
for fpath in changed_files:
|
|
if "tests/integration/test_" in fpath:
|
|
logging.info("File %s changed and seems like integration test", fpath)
|
|
result.add(fpath.split("/")[2])
|
|
return filter_existing_tests(result, repo_path)
|
|
|
|
|
|
def filter_existing_tests(tests_to_run, repo_path):
|
|
result = []
|
|
for relative_test_path in tests_to_run:
|
|
if os.path.exists(
|
|
os.path.join(repo_path, "tests/integration", relative_test_path)
|
|
):
|
|
result.append(relative_test_path)
|
|
else:
|
|
logging.info(
|
|
"Skipping test %s, seems like it was removed", relative_test_path
|
|
)
|
|
return result
|
|
|
|
|
|
def _get_deselect_option(tests):
|
|
return " ".join([f"--deselect {t}" for t in tests])
|
|
|
|
|
|
# https://stackoverflow.com/questions/312443/how-do-you-split-a-list-into-evenly-sized-chunks
|
|
def chunks(lst, n):
|
|
"""Yield successive n-sized chunks from lst."""
|
|
for i in range(0, len(lst), n):
|
|
yield lst[i : i + n]
|
|
|
|
|
|
def get_counters(fname):
|
|
counters = {
|
|
"ERROR": set([]),
|
|
"PASSED": set([]),
|
|
"FAILED": set([]),
|
|
"SKIPPED": set([]),
|
|
}
|
|
|
|
with open(fname, "r") as out:
|
|
for line in out:
|
|
line = line.strip()
|
|
# Example of log:
|
|
#
|
|
# test_mysql_protocol/test.py::test_golang_client
|
|
# [gw0] [ 7%] ERROR test_mysql_protocol/test.py::test_golang_client
|
|
#
|
|
# And only the line with test status should be matched
|
|
if not (".py::" in line and " " in line):
|
|
continue
|
|
|
|
line = line.strip()
|
|
# [gw0] [ 7%] ERROR test_mysql_protocol/test.py::test_golang_client
|
|
# ^^^^^^^^^^^^^
|
|
if line.strip().startswith("["):
|
|
line = re.sub("^\[[^\[\]]*\] \[[^\[\]]*\] ", "", line)
|
|
|
|
line_arr = line.split(" ")
|
|
if len(line_arr) < 2:
|
|
logging.debug("Strange line %s", line)
|
|
continue
|
|
|
|
# Lines like:
|
|
#
|
|
# ERROR test_mysql_protocol/test.py::test_golang_client
|
|
# PASSED test_replicated_users/test.py::test_rename_replicated[QUOTA]
|
|
# PASSED test_drop_is_lock_free/test.py::test_query_is_lock_free[detach part]
|
|
#
|
|
state = line_arr.pop(0)
|
|
test_name = " ".join(line_arr)
|
|
|
|
# Normalize test names for lines like this:
|
|
#
|
|
# FAILED test_storage_s3/test.py::test_url_reconnect_in_the_middle - Exception
|
|
# FAILED test_distributed_ddl/test.py::test_default_database[configs] - AssertionError: assert ...
|
|
#
|
|
test_name = re.sub(
|
|
r"^(?P<test_name>[^\[\] ]+)(?P<test_param>\[[^\[\]]*\]|)(?P<test_error> - .*|)$",
|
|
r"\g<test_name>\g<test_param>",
|
|
test_name,
|
|
)
|
|
|
|
if state in counters:
|
|
counters[state].add(test_name)
|
|
else:
|
|
# will skip lines like:
|
|
# 30.76s call test_host_ip_change/test.py::test_ip_drop_cache
|
|
# 5.71s teardown test_host_ip_change/test.py::test_ip_change[node1]
|
|
# and similar
|
|
logging.debug("Strange state in line %s", line)
|
|
|
|
return {k: list(v) for k, v in counters.items()}
|
|
|
|
|
|
def parse_test_times(fname):
|
|
read = False
|
|
description_output = []
|
|
with open(fname, "r") as out:
|
|
for line in out:
|
|
if read and "==" in line:
|
|
break
|
|
if read and line.strip():
|
|
description_output.append(line.strip())
|
|
if "slowest durations" in line:
|
|
read = True
|
|
return description_output
|
|
|
|
|
|
def get_test_times(output):
|
|
result = defaultdict(float)
|
|
for line in output:
|
|
if ".py" in line:
|
|
line_arr = line.strip().split(" ")
|
|
test_time = line_arr[0]
|
|
test_name = " ".join([elem for elem in line_arr[2:] if elem])
|
|
if test_name not in result:
|
|
result[test_name] = 0.0
|
|
result[test_name] += float(test_time[:-1])
|
|
return result
|
|
|
|
|
|
def clear_ip_tables_and_restart_daemons():
|
|
logging.info(
|
|
"Dump iptables after run %s",
|
|
subprocess.check_output("sudo iptables -nvL", shell=True),
|
|
)
|
|
try:
|
|
logging.info("Killing all alive docker containers")
|
|
subprocess.check_output(
|
|
"timeout --signal=KILL 10m docker ps --quiet | xargs --no-run-if-empty docker kill",
|
|
shell=True,
|
|
)
|
|
except subprocess.CalledProcessError as err:
|
|
logging.info("docker kill excepted: " + str(err))
|
|
|
|
try:
|
|
logging.info("Removing all docker containers")
|
|
subprocess.check_output(
|
|
"timeout --signal=KILL 10m docker ps --all --quiet | xargs --no-run-if-empty docker rm --force",
|
|
shell=True,
|
|
)
|
|
except subprocess.CalledProcessError as err:
|
|
logging.info("docker rm excepted: " + str(err))
|
|
|
|
# don't restart docker if it's disabled
|
|
if os.environ.get("CLICKHOUSE_TESTS_RUNNER_RESTART_DOCKER", "1") == "1":
|
|
try:
|
|
logging.info("Stopping docker daemon")
|
|
subprocess.check_output("service docker stop", shell=True)
|
|
except subprocess.CalledProcessError as err:
|
|
logging.info("docker stop excepted: " + str(err))
|
|
|
|
try:
|
|
for i in range(200):
|
|
try:
|
|
logging.info("Restarting docker %s", i)
|
|
subprocess.check_output("service docker start", shell=True)
|
|
subprocess.check_output("docker ps", shell=True)
|
|
break
|
|
except subprocess.CalledProcessError as err:
|
|
time.sleep(0.5)
|
|
logging.info("Waiting docker to start, current %s", str(err))
|
|
else:
|
|
raise Exception("Docker daemon doesn't responding")
|
|
except subprocess.CalledProcessError as err:
|
|
logging.info("Can't reload docker: " + str(err))
|
|
|
|
iptables_iter = 0
|
|
try:
|
|
for i in range(1000):
|
|
iptables_iter = i
|
|
# when rules will be empty, it will raise exception
|
|
subprocess.check_output("sudo iptables -D DOCKER-USER 1", shell=True)
|
|
except subprocess.CalledProcessError as err:
|
|
logging.info(
|
|
"All iptables rules cleared, %s iterations, last error: %s",
|
|
iptables_iter,
|
|
str(err),
|
|
)
|
|
|
|
|
|
class ClickhouseIntegrationTestsRunner:
|
|
def __init__(self, result_path, params):
|
|
self.result_path = result_path
|
|
self.params = params
|
|
|
|
self.image_versions = self.params["docker_images_with_versions"]
|
|
self.shuffle_groups = self.params["shuffle_test_groups"]
|
|
self.flaky_check = "flaky check" in self.params["context_name"]
|
|
self.bugfix_validate_check = (
|
|
"bugfix validate check" in self.params["context_name"]
|
|
)
|
|
# if use_tmpfs is not set we assume it to be true, otherwise check
|
|
self.use_tmpfs = "use_tmpfs" not in self.params or self.params["use_tmpfs"]
|
|
self.disable_net_host = (
|
|
"disable_net_host" in self.params and self.params["disable_net_host"]
|
|
)
|
|
self.start_time = time.time()
|
|
self.soft_deadline_time = self.start_time + (TASK_TIMEOUT - MAX_TIME_IN_SANDBOX)
|
|
|
|
self.use_analyzer = os.environ.get("CLICKHOUSE_USE_NEW_ANALYZER") is not None
|
|
|
|
if "run_by_hash_total" in self.params:
|
|
self.run_by_hash_total = self.params["run_by_hash_total"]
|
|
self.run_by_hash_num = self.params["run_by_hash_num"]
|
|
else:
|
|
self.run_by_hash_total = 0
|
|
self.run_by_hash_num = 0
|
|
|
|
def path(self):
|
|
return self.result_path
|
|
|
|
def base_path(self):
|
|
return os.path.join(str(self.result_path), "../")
|
|
|
|
def should_skip_tests(self):
|
|
return []
|
|
|
|
def get_image_with_version(self, name):
|
|
if name in self.image_versions:
|
|
return name + ":" + self.image_versions[name]
|
|
logging.warn(
|
|
"Cannot find image %s in params list %s", name, self.image_versions
|
|
)
|
|
if ":" not in name:
|
|
return name + ":latest"
|
|
return name
|
|
|
|
def get_image_version(self, name: str):
|
|
if name in self.image_versions:
|
|
return self.image_versions[name]
|
|
logging.warn(
|
|
"Cannot find image %s in params list %s", name, self.image_versions
|
|
)
|
|
return "latest"
|
|
|
|
def shuffle_test_groups(self):
|
|
return self.shuffle_groups != 0
|
|
|
|
@staticmethod
|
|
def get_images_names():
|
|
return [
|
|
"clickhouse/dotnet-client",
|
|
"clickhouse/integration-helper",
|
|
"clickhouse/integration-test",
|
|
"clickhouse/integration-tests-runner",
|
|
"clickhouse/kerberized-hadoop",
|
|
"clickhouse/kerberos-kdc",
|
|
"clickhouse/mysql-golang-client",
|
|
"clickhouse/mysql-java-client",
|
|
"clickhouse/mysql-js-client",
|
|
"clickhouse/mysql-php-client",
|
|
"clickhouse/nginx-dav",
|
|
"clickhouse/postgresql-java-client",
|
|
]
|
|
|
|
def _pre_pull_images(self, repo_path):
|
|
image_cmd = self._get_runner_image_cmd(repo_path)
|
|
|
|
cmd = (
|
|
"cd {repo_path}/tests/integration && "
|
|
"timeout --signal=KILL 1h ./runner {runner_opts} {image_cmd} --pre-pull --command '{command}' ".format(
|
|
repo_path=repo_path,
|
|
runner_opts=self._get_runner_opts(),
|
|
image_cmd=image_cmd,
|
|
command=r""" echo Pre Pull finished """,
|
|
)
|
|
)
|
|
|
|
for i in range(5):
|
|
logging.info("Pulling images before running tests. Attempt %s", i)
|
|
try:
|
|
subprocess.check_output(
|
|
cmd,
|
|
shell=True,
|
|
)
|
|
return
|
|
except subprocess.CalledProcessError as err:
|
|
logging.info("docker-compose pull failed: " + str(err))
|
|
continue
|
|
logging.error("Pulling images failed for 5 attempts. Will fail the worker.")
|
|
# We pass specific retcode to to ci/integration_test_check.py to skip status reporting and restart job
|
|
exit(13)
|
|
|
|
def _can_run_with(self, path, opt):
|
|
with open(path, "r") as script:
|
|
for line in script:
|
|
if opt in line:
|
|
return True
|
|
return False
|
|
|
|
def _install_clickhouse(self, debs_path):
|
|
for package in (
|
|
"clickhouse-common-static_",
|
|
"clickhouse-server_",
|
|
"clickhouse-client",
|
|
"clickhouse-common-static-dbg_",
|
|
): # order matters
|
|
logging.info("Installing package %s", package)
|
|
for f in os.listdir(debs_path):
|
|
if package in f:
|
|
full_path = os.path.join(debs_path, f)
|
|
logging.info("Package found in %s", full_path)
|
|
log_name = "install_" + f + ".log"
|
|
log_path = os.path.join(str(self.path()), log_name)
|
|
with open(log_path, "w") as log:
|
|
cmd = "dpkg -x {} .".format(full_path)
|
|
logging.info("Executing installation cmd %s", cmd)
|
|
retcode = subprocess.Popen(
|
|
cmd, shell=True, stderr=log, stdout=log
|
|
).wait()
|
|
if retcode == 0:
|
|
logging.info("Installation of %s successfull", full_path)
|
|
else:
|
|
raise Exception("Installation of %s failed", full_path)
|
|
break
|
|
else:
|
|
raise Exception("Package with {} not found".format(package))
|
|
# logging.info("Unstripping binary")
|
|
# logging.info(
|
|
# "Unstring %s",
|
|
# subprocess.check_output(
|
|
# "eu-unstrip /usr/bin/clickhouse {}".format(CLICKHOUSE_BINARY_PATH),
|
|
# shell=True,
|
|
# ),
|
|
# )
|
|
|
|
logging.info("All packages installed")
|
|
os.chmod(CLICKHOUSE_BINARY_PATH, 0o777)
|
|
os.chmod(CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH, 0o777)
|
|
os.chmod(CLICKHOUSE_LIBRARY_BRIDGE_BINARY_PATH, 0o777)
|
|
shutil.copy(
|
|
CLICKHOUSE_BINARY_PATH, os.getenv("CLICKHOUSE_TESTS_SERVER_BIN_PATH")
|
|
)
|
|
shutil.copy(
|
|
CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH,
|
|
os.getenv("CLICKHOUSE_TESTS_ODBC_BRIDGE_BIN_PATH"),
|
|
)
|
|
shutil.copy(
|
|
CLICKHOUSE_LIBRARY_BRIDGE_BINARY_PATH,
|
|
os.getenv("CLICKHOUSE_TESTS_LIBRARY_BRIDGE_BIN_PATH"),
|
|
)
|
|
|
|
def _compress_logs(self, dir, relpaths, result_path):
|
|
retcode = subprocess.call( # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL
|
|
"tar --use-compress-program='zstd --threads=0' -cf {} -C {} {}".format(
|
|
result_path, dir, " ".join(relpaths)
|
|
),
|
|
shell=True,
|
|
)
|
|
# tar return 1 when the files are changed on compressing, we ignore it
|
|
if retcode in (0, 1):
|
|
return
|
|
# but even on the fatal errors it's better to retry
|
|
logging.error("Fatal error on compressing %s: %s", result_path, retcode)
|
|
|
|
def _get_runner_opts(self):
|
|
result = []
|
|
if self.use_tmpfs:
|
|
result.append("--tmpfs")
|
|
if self.disable_net_host:
|
|
result.append("--disable-net-host")
|
|
if self.use_analyzer:
|
|
result.append("--analyzer")
|
|
|
|
return " ".join(result)
|
|
|
|
def _get_all_tests(self, repo_path):
|
|
image_cmd = self._get_runner_image_cmd(repo_path)
|
|
out_file = "all_tests.txt"
|
|
out_file_full = os.path.join(self.result_path, "runner_get_all_tests.log")
|
|
cmd = (
|
|
"cd {repo_path}/tests/integration && "
|
|
"timeout --signal=KILL 1h ./runner {runner_opts} {image_cmd} -- --setup-plan "
|
|
"| tee '{out_file_full}' | grep -F '::' | sed -r 's/ \(fixtures used:.*//g; s/^ *//g; s/ *$//g' "
|
|
"| grep -v -F 'SKIPPED' | sort --unique > {out_file}".format(
|
|
repo_path=repo_path,
|
|
runner_opts=self._get_runner_opts(),
|
|
image_cmd=image_cmd,
|
|
out_file=out_file,
|
|
out_file_full=out_file_full,
|
|
)
|
|
)
|
|
|
|
logging.info("Getting all tests with cmd '%s'", cmd)
|
|
subprocess.check_call( # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL
|
|
cmd, shell=True
|
|
)
|
|
|
|
all_tests_file_path = "{repo_path}/tests/integration/{out_file}".format(
|
|
repo_path=repo_path, out_file=out_file
|
|
)
|
|
if (
|
|
not os.path.isfile(all_tests_file_path)
|
|
or os.path.getsize(all_tests_file_path) == 0
|
|
):
|
|
if os.path.isfile(out_file_full):
|
|
# log runner output
|
|
logging.info("runner output:")
|
|
with open(out_file_full, "r") as all_tests_full_file:
|
|
for line in all_tests_full_file:
|
|
line = line.rstrip()
|
|
if line:
|
|
logging.info("runner output: %s", line)
|
|
else:
|
|
logging.info("runner output '%s' is empty", out_file_full)
|
|
|
|
raise Exception(
|
|
"There is something wrong with getting all tests list: file '{}' is empty or does not exist.".format(
|
|
all_tests_file_path
|
|
)
|
|
)
|
|
|
|
all_tests = []
|
|
with open(all_tests_file_path, "r") as all_tests_file:
|
|
for line in all_tests_file:
|
|
all_tests.append(line.strip())
|
|
return list(sorted(all_tests))
|
|
|
|
def _get_parallel_tests_skip_list(self, repo_path):
|
|
skip_list_file_path = "{}/tests/integration/parallel_skip.json".format(
|
|
repo_path
|
|
)
|
|
if (
|
|
not os.path.isfile(skip_list_file_path)
|
|
or os.path.getsize(skip_list_file_path) == 0
|
|
):
|
|
raise Exception(
|
|
"There is something wrong with getting all tests list: file '{}' is empty or does not exist.".format(
|
|
skip_list_file_path
|
|
)
|
|
)
|
|
|
|
skip_list_tests = []
|
|
with open(skip_list_file_path, "r") as skip_list_file:
|
|
skip_list_tests = json.load(skip_list_file)
|
|
return list(sorted(skip_list_tests))
|
|
|
|
def group_test_by_file(self, tests):
|
|
result = {}
|
|
for test in tests:
|
|
test_file = test.split("::")[0]
|
|
if test_file not in result:
|
|
result[test_file] = []
|
|
result[test_file].append(test)
|
|
return result
|
|
|
|
def _update_counters(self, main_counters, current_counters, broken_tests):
|
|
for test in current_counters["PASSED"]:
|
|
if test not in main_counters["PASSED"]:
|
|
if test in main_counters["FAILED"]:
|
|
main_counters["FAILED"].remove(test)
|
|
if test in main_counters["ERROR"]:
|
|
main_counters["ERROR"].remove(test)
|
|
if test in main_counters["BROKEN"]:
|
|
main_counters["BROKEN"].remove(test)
|
|
|
|
if test not in broken_tests:
|
|
main_counters["PASSED"].append(test)
|
|
else:
|
|
main_counters["NOT_FAILED"].append(test)
|
|
|
|
for state in ("ERROR", "FAILED"):
|
|
for test in current_counters[state]:
|
|
if test in main_counters["PASSED"]:
|
|
main_counters["PASSED"].remove(test)
|
|
continue
|
|
if test not in broken_tests:
|
|
if test not in main_counters[state]:
|
|
main_counters[state].append(test)
|
|
else:
|
|
if test not in main_counters["BROKEN"]:
|
|
main_counters["BROKEN"].append(test)
|
|
|
|
for state in ("SKIPPED",):
|
|
for test in current_counters[state]:
|
|
main_counters[state].append(test)
|
|
|
|
def _get_runner_image_cmd(self, repo_path):
|
|
image_cmd = ""
|
|
if self._can_run_with(
|
|
os.path.join(repo_path, "tests/integration", "runner"),
|
|
"--docker-image-version",
|
|
):
|
|
for img in self.get_images_names():
|
|
if img == "clickhouse/integration-tests-runner":
|
|
runner_version = self.get_image_version(img)
|
|
logging.info(
|
|
"Can run with custom docker image version %s", runner_version
|
|
)
|
|
image_cmd += " --docker-image-version={} ".format(runner_version)
|
|
else:
|
|
if self._can_run_with(
|
|
os.path.join(repo_path, "tests/integration", "runner"),
|
|
"--docker-compose-images-tags",
|
|
):
|
|
image_cmd += "--docker-compose-images-tags={} ".format(
|
|
self.get_image_with_version(img)
|
|
)
|
|
else:
|
|
image_cmd = ""
|
|
logging.info("Cannot run with custom docker image version :(")
|
|
return image_cmd
|
|
|
|
def _find_test_data_dirs(self, repo_path, test_names):
|
|
relpaths = {}
|
|
for test_name in test_names:
|
|
if "/" in test_name:
|
|
test_dir = test_name[: test_name.find("/")]
|
|
else:
|
|
test_dir = test_name
|
|
if os.path.isdir(os.path.join(repo_path, "tests/integration", test_dir)):
|
|
for name in os.listdir(
|
|
os.path.join(repo_path, "tests/integration", test_dir)
|
|
):
|
|
relpath = os.path.join(os.path.join(test_dir, name))
|
|
mtime = os.path.getmtime(
|
|
os.path.join(repo_path, "tests/integration", relpath)
|
|
)
|
|
relpaths[relpath] = mtime
|
|
return relpaths
|
|
|
|
def _get_test_data_dirs_difference(self, new_snapshot, old_snapshot):
|
|
res = set()
|
|
for path in new_snapshot:
|
|
if (path not in old_snapshot) or (old_snapshot[path] != new_snapshot[path]):
|
|
res.add(path)
|
|
return res
|
|
|
|
def try_run_test_group(
|
|
self,
|
|
repo_path,
|
|
test_group,
|
|
tests_in_group,
|
|
num_tries,
|
|
num_workers,
|
|
broken_tests,
|
|
):
|
|
try:
|
|
return self.run_test_group(
|
|
repo_path,
|
|
test_group,
|
|
tests_in_group,
|
|
num_tries,
|
|
num_workers,
|
|
broken_tests,
|
|
)
|
|
except Exception as e:
|
|
logging.info("Failed to run {}:\n{}".format(str(test_group), str(e)))
|
|
counters = {
|
|
"ERROR": [],
|
|
"PASSED": [],
|
|
"FAILED": [],
|
|
"SKIPPED": [],
|
|
}
|
|
tests_times = defaultdict(float)
|
|
for test in tests_in_group:
|
|
counters["ERROR"].append(test)
|
|
tests_times[test] = 0
|
|
return counters, tests_times, []
|
|
|
|
def run_test_group(
|
|
self,
|
|
repo_path,
|
|
test_group,
|
|
tests_in_group,
|
|
num_tries,
|
|
num_workers,
|
|
broken_tests,
|
|
):
|
|
counters = {
|
|
"ERROR": [],
|
|
"PASSED": [],
|
|
"FAILED": [],
|
|
"SKIPPED": [],
|
|
"BROKEN": [],
|
|
"NOT_FAILED": [],
|
|
}
|
|
tests_times = defaultdict(float)
|
|
|
|
if self.soft_deadline_time < time.time():
|
|
for test in tests_in_group:
|
|
logging.info("Task timeout exceeded, skipping %s", test)
|
|
counters["SKIPPED"].append(test)
|
|
tests_times[test] = 0
|
|
return counters, tests_times, []
|
|
|
|
image_cmd = self._get_runner_image_cmd(repo_path)
|
|
test_group_str = test_group.replace("/", "_").replace(".", "_")
|
|
|
|
log_paths = []
|
|
test_data_dirs = {}
|
|
|
|
for i in range(num_tries):
|
|
logging.info("Running test group %s for the %s retry", test_group, i)
|
|
clear_ip_tables_and_restart_daemons()
|
|
|
|
test_names = set([])
|
|
for test_name in tests_in_group:
|
|
if test_name not in counters["PASSED"]:
|
|
test_names.add(test_name)
|
|
|
|
if i == 0:
|
|
test_data_dirs = self._find_test_data_dirs(repo_path, test_names)
|
|
|
|
info_basename = test_group_str + "_" + str(i) + ".nfo"
|
|
info_path = os.path.join(repo_path, "tests/integration", info_basename)
|
|
|
|
test_cmd = " ".join([shlex.quote(test) for test in sorted(test_names)])
|
|
parallel_cmd = (
|
|
" --parallel {} ".format(num_workers) if num_workers > 0 else ""
|
|
)
|
|
# -r -- show extra test summary:
|
|
# -f -- (f)ailed
|
|
# -E -- (E)rror
|
|
# -p -- (p)assed
|
|
# -s -- (s)kipped
|
|
cmd = "cd {}/tests/integration && timeout --signal=KILL 1h ./runner {} {} -t {} {} -- -rfEps --run-id={} --color=no --durations=0 {} | tee {}".format(
|
|
repo_path,
|
|
self._get_runner_opts(),
|
|
image_cmd,
|
|
test_cmd,
|
|
parallel_cmd,
|
|
i,
|
|
_get_deselect_option(self.should_skip_tests()),
|
|
info_path,
|
|
)
|
|
|
|
log_basename = test_group_str + "_" + str(i) + ".log"
|
|
log_path = os.path.join(repo_path, "tests/integration", log_basename)
|
|
with open(log_path, "w") as log:
|
|
logging.info("Executing cmd: %s", cmd)
|
|
# ignore retcode, since it meaningful due to pipe to tee
|
|
subprocess.Popen(cmd, shell=True, stderr=log, stdout=log).wait()
|
|
|
|
extra_logs_names = [log_basename]
|
|
log_result_path = os.path.join(
|
|
str(self.path()), "integration_run_" + log_basename
|
|
)
|
|
shutil.copy(log_path, log_result_path)
|
|
log_paths.append(log_result_path)
|
|
|
|
for pytest_log_path in glob.glob(
|
|
os.path.join(repo_path, "tests/integration/pytest*.log")
|
|
):
|
|
new_name = (
|
|
test_group_str
|
|
+ "_"
|
|
+ str(i)
|
|
+ "_"
|
|
+ os.path.basename(pytest_log_path)
|
|
)
|
|
os.rename(
|
|
pytest_log_path,
|
|
os.path.join(repo_path, "tests/integration", new_name),
|
|
)
|
|
extra_logs_names.append(new_name)
|
|
|
|
dockerd_log_path = os.path.join(repo_path, "tests/integration/dockerd.log")
|
|
if os.path.exists(dockerd_log_path):
|
|
new_name = (
|
|
test_group_str
|
|
+ "_"
|
|
+ str(i)
|
|
+ "_"
|
|
+ os.path.basename(dockerd_log_path)
|
|
)
|
|
os.rename(
|
|
dockerd_log_path,
|
|
os.path.join(repo_path, "tests/integration", new_name),
|
|
)
|
|
extra_logs_names.append(new_name)
|
|
|
|
if os.path.exists(info_path):
|
|
extra_logs_names.append(info_basename)
|
|
new_counters = get_counters(info_path)
|
|
for state, tests in new_counters.items():
|
|
logging.info(
|
|
"Tests with %s state (%s): %s", state, len(tests), tests
|
|
)
|
|
times_lines = parse_test_times(info_path)
|
|
new_tests_times = get_test_times(times_lines)
|
|
self._update_counters(counters, new_counters, broken_tests)
|
|
for test_name, test_time in new_tests_times.items():
|
|
tests_times[test_name] = test_time
|
|
|
|
test_data_dirs_new = self._find_test_data_dirs(repo_path, test_names)
|
|
test_data_dirs_diff = self._get_test_data_dirs_difference(
|
|
test_data_dirs_new, test_data_dirs
|
|
)
|
|
test_data_dirs = test_data_dirs_new
|
|
|
|
if extra_logs_names or test_data_dirs_diff:
|
|
extras_result_path = os.path.join(
|
|
str(self.path()),
|
|
"integration_run_" + test_group_str + "_" + str(i) + ".tar.zst",
|
|
)
|
|
self._compress_logs(
|
|
os.path.join(repo_path, "tests/integration"),
|
|
extra_logs_names + list(test_data_dirs_diff),
|
|
extras_result_path,
|
|
)
|
|
log_paths.append(extras_result_path)
|
|
|
|
if len(counters["PASSED"]) == len(tests_in_group):
|
|
logging.info("All tests from group %s passed", test_group)
|
|
break
|
|
if (
|
|
len(counters["PASSED"]) >= 0
|
|
and len(counters["FAILED"]) == 0
|
|
and len(counters["ERROR"]) == 0
|
|
):
|
|
logging.info(
|
|
"Seems like all tests passed but some of them are skipped or "
|
|
"deselected. Ignoring them and finishing group."
|
|
)
|
|
break
|
|
else:
|
|
# Mark all non tried tests as errors, with '::' in name
|
|
# (example test_partition/test.py::test_partition_simple). For flaky check
|
|
# we run whole test dirs like "test_odbc_interaction" and don't
|
|
# want to mark them as error so we filter by '::'.
|
|
for test in tests_in_group:
|
|
if (
|
|
test not in counters["PASSED"]
|
|
and test not in counters["ERROR"]
|
|
and test not in counters["SKIPPED"]
|
|
and test not in counters["FAILED"]
|
|
and test not in counters["BROKEN"]
|
|
and "::" in test
|
|
):
|
|
counters["ERROR"].append(test)
|
|
|
|
return counters, tests_times, log_paths
|
|
|
|
def run_flaky_check(self, repo_path, build_path, should_fail=False):
|
|
pr_info = self.params["pr_info"]
|
|
|
|
tests_to_run = get_changed_tests_to_run(pr_info, repo_path)
|
|
if not tests_to_run:
|
|
logging.info("No integration tests to run found")
|
|
return "success", NO_CHANGES_MSG, [(NO_CHANGES_MSG, "OK")], ""
|
|
|
|
self._install_clickhouse(build_path)
|
|
logging.info("Found '%s' tests to run", " ".join(tests_to_run))
|
|
result_state = "success"
|
|
description_prefix = "No flaky tests: "
|
|
start = time.time()
|
|
logging.info("Starting check with retries")
|
|
final_retry = 0
|
|
logs = []
|
|
tires_num = 1 if should_fail else FLAKY_TRIES_COUNT
|
|
for i in range(tires_num):
|
|
final_retry += 1
|
|
logging.info("Running tests for the %s time", i)
|
|
counters, tests_times, log_paths = self.try_run_test_group(
|
|
repo_path, "bugfix" if should_fail else "flaky", tests_to_run, 1, 1, []
|
|
)
|
|
logs += log_paths
|
|
if counters["FAILED"]:
|
|
logging.info("Found failed tests: %s", " ".join(counters["FAILED"]))
|
|
description_prefix = "Failed tests found: "
|
|
result_state = "failure"
|
|
if not should_fail:
|
|
break
|
|
if counters["ERROR"]:
|
|
description_prefix = "Failed tests found: "
|
|
logging.info("Found error tests: %s", " ".join(counters["ERROR"]))
|
|
# NOTE "error" result state will restart the whole test task,
|
|
# so we use "failure" here
|
|
result_state = "failure"
|
|
if not should_fail:
|
|
break
|
|
logging.info("Try is OK, all tests passed, going to clear env")
|
|
clear_ip_tables_and_restart_daemons()
|
|
logging.info("And going to sleep for some time")
|
|
if time.time() - start > MAX_TIME_SECONDS:
|
|
logging.info("Timeout reached, going to finish flaky check")
|
|
break
|
|
time.sleep(5)
|
|
|
|
test_result = []
|
|
for state in ("ERROR", "FAILED", "PASSED", "SKIPPED"):
|
|
if state == "PASSED":
|
|
text_state = "OK"
|
|
elif state == "FAILED":
|
|
text_state = "FAIL"
|
|
else:
|
|
text_state = state
|
|
test_result += [
|
|
(
|
|
c + " (✕" + str(final_retry) + ")",
|
|
text_state,
|
|
"{:.2f}".format(tests_times[c]),
|
|
)
|
|
for c in counters[state]
|
|
]
|
|
status_text = description_prefix + ", ".join(
|
|
[
|
|
str(n).lower().replace("failed", "fail") + ": " + str(len(c))
|
|
for n, c in counters.items()
|
|
]
|
|
)
|
|
|
|
return result_state, status_text, test_result, logs
|
|
|
|
def run_impl(self, repo_path, build_path):
|
|
if self.flaky_check or self.bugfix_validate_check:
|
|
return self.run_flaky_check(
|
|
repo_path, build_path, should_fail=self.bugfix_validate_check
|
|
)
|
|
|
|
self._install_clickhouse(build_path)
|
|
|
|
logging.info("Pulling images")
|
|
runner._pre_pull_images(repo_path)
|
|
|
|
logging.info(
|
|
"Dump iptables before run %s",
|
|
subprocess.check_output("sudo iptables -nvL", shell=True),
|
|
)
|
|
all_tests = self._get_all_tests(repo_path)
|
|
|
|
if self.run_by_hash_total != 0:
|
|
grouped_tests = self.group_test_by_file(all_tests)
|
|
all_filtered_by_hash_tests = []
|
|
for group, tests_in_group in grouped_tests.items():
|
|
if stringhash(group) % self.run_by_hash_total == self.run_by_hash_num:
|
|
all_filtered_by_hash_tests += tests_in_group
|
|
all_tests = all_filtered_by_hash_tests
|
|
|
|
parallel_skip_tests = self._get_parallel_tests_skip_list(repo_path)
|
|
logging.info(
|
|
"Found %s tests first 3 %s", len(all_tests), " ".join(all_tests[:3])
|
|
)
|
|
filtered_sequential_tests = list(
|
|
filter(lambda test: has_test(all_tests, test), parallel_skip_tests)
|
|
)
|
|
filtered_parallel_tests = list(
|
|
filter(
|
|
lambda test: not has_test(parallel_skip_tests, test),
|
|
all_tests,
|
|
)
|
|
)
|
|
not_found_tests = list(
|
|
filter(
|
|
lambda test: not has_test(all_tests, test),
|
|
parallel_skip_tests,
|
|
)
|
|
)
|
|
logging.info(
|
|
"Found %s tests first 3 %s, parallel %s, other %s",
|
|
len(all_tests),
|
|
" ".join(all_tests[:3]),
|
|
len(filtered_parallel_tests),
|
|
len(filtered_sequential_tests),
|
|
)
|
|
logging.info(
|
|
"Not found %s tests first 3 %s",
|
|
len(not_found_tests),
|
|
" ".join(not_found_tests[:3]),
|
|
)
|
|
|
|
grouped_tests = self.group_test_by_file(filtered_sequential_tests)
|
|
i = 0
|
|
for par_group in chunks(filtered_parallel_tests, PARALLEL_GROUP_SIZE):
|
|
grouped_tests[f"parallel{i}"] = par_group
|
|
i += 1
|
|
logging.info("Found %s tests groups", len(grouped_tests))
|
|
|
|
counters = {
|
|
"ERROR": [],
|
|
"PASSED": [],
|
|
"FAILED": [],
|
|
"SKIPPED": [],
|
|
"BROKEN": [],
|
|
"NOT_FAILED": [],
|
|
}
|
|
tests_times = defaultdict(float)
|
|
tests_log_paths = defaultdict(list)
|
|
|
|
items_to_run = list(grouped_tests.items())
|
|
|
|
logging.info("Total test groups %s", len(items_to_run))
|
|
if self.shuffle_test_groups():
|
|
logging.info("Shuffling test groups")
|
|
random.shuffle(items_to_run)
|
|
|
|
broken_tests = list()
|
|
if self.use_analyzer:
|
|
with open(f"{repo_path}/tests/analyzer_integration_broken_tests.txt") as f:
|
|
broken_tests = f.read().splitlines()
|
|
logging.info(f"Broken tests in the list: {len(broken_tests)}")
|
|
|
|
for group, tests in items_to_run:
|
|
logging.info("Running test group %s containing %s tests", group, len(tests))
|
|
group_counters, group_test_times, log_paths = self.try_run_test_group(
|
|
repo_path, group, tests, MAX_RETRY, NUM_WORKERS, broken_tests
|
|
)
|
|
total_tests = 0
|
|
for counter, value in group_counters.items():
|
|
logging.info(
|
|
"Tests from group %s stats, %s count %s", group, counter, len(value)
|
|
)
|
|
counters[counter] += value
|
|
logging.info(
|
|
"Totally have %s with status %s", len(counters[counter]), counter
|
|
)
|
|
total_tests += len(counters[counter])
|
|
logging.info("Totally finished tests %s/%s", total_tests, len(all_tests))
|
|
|
|
for test_name, test_time in group_test_times.items():
|
|
tests_times[test_name] = test_time
|
|
tests_log_paths[test_name] = log_paths
|
|
|
|
if len(counters["FAILED"]) + len(counters["ERROR"]) >= 20:
|
|
logging.info("Collected more than 20 failed/error tests, stopping")
|
|
break
|
|
|
|
if counters["FAILED"] or counters["ERROR"]:
|
|
logging.info(
|
|
"Overall status failure, because we have tests in FAILED or ERROR state"
|
|
)
|
|
result_state = "failure"
|
|
else:
|
|
logging.info("Overall success!")
|
|
result_state = "success"
|
|
|
|
test_result = []
|
|
for state in (
|
|
"ERROR",
|
|
"FAILED",
|
|
"PASSED",
|
|
"SKIPPED",
|
|
"BROKEN",
|
|
"NOT_FAILED",
|
|
):
|
|
if state == "PASSED":
|
|
text_state = "OK"
|
|
elif state == "FAILED":
|
|
text_state = "FAIL"
|
|
else:
|
|
text_state = state
|
|
test_result += [
|
|
(c, text_state, "{:.2f}".format(tests_times[c]), tests_log_paths[c])
|
|
for c in counters[state]
|
|
]
|
|
|
|
failed_sum = len(counters["FAILED"]) + len(counters["ERROR"])
|
|
status_text = "fail: {}, passed: {}".format(failed_sum, len(counters["PASSED"]))
|
|
|
|
if self.soft_deadline_time < time.time():
|
|
status_text = "Timeout, " + status_text
|
|
result_state = "failure"
|
|
|
|
if not counters or sum(len(counter) for counter in counters.values()) == 0:
|
|
status_text = "No tests found for some reason! It's a bug"
|
|
result_state = "failure"
|
|
|
|
if "(memory)" in self.params["context_name"]:
|
|
result_state = "success"
|
|
|
|
return result_state, status_text, test_result, []
|
|
|
|
|
|
def write_results(results_file, status_file, results, status):
|
|
with open(results_file, "w") as f:
|
|
out = csv.writer(f, delimiter="\t")
|
|
out.writerows(results)
|
|
with open(status_file, "w") as f:
|
|
out = csv.writer(f, delimiter="\t")
|
|
out.writerow(status)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
|
|
|
|
repo_path = os.environ.get("CLICKHOUSE_TESTS_REPO_PATH")
|
|
build_path = os.environ.get("CLICKHOUSE_TESTS_BUILD_PATH")
|
|
result_path = os.environ.get("CLICKHOUSE_TESTS_RESULT_PATH")
|
|
params_path = os.environ.get("CLICKHOUSE_TESTS_JSON_PARAMS_PATH")
|
|
|
|
params = json.loads(open(params_path, "r").read())
|
|
runner = ClickhouseIntegrationTestsRunner(result_path, params)
|
|
|
|
logging.info("Running tests")
|
|
|
|
# Avoid overlaps with previous runs
|
|
logging.info("Clearing dmesg before run")
|
|
subprocess.check_call( # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL
|
|
"dmesg --clear", shell=True
|
|
)
|
|
|
|
state, description, test_results, _ = runner.run_impl(repo_path, build_path)
|
|
logging.info("Tests finished")
|
|
|
|
# Dump dmesg (to capture possible OOMs)
|
|
logging.info("Dumping dmesg")
|
|
subprocess.check_call( # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL
|
|
"dmesg -T", shell=True
|
|
)
|
|
|
|
status = (state, description)
|
|
out_results_file = os.path.join(str(runner.path()), "test_results.tsv")
|
|
out_status_file = os.path.join(str(runner.path()), "check_status.tsv")
|
|
write_results(out_results_file, out_status_file, test_results, status)
|
|
logging.info("Result written")
|