ClickHouse/tests/integration/runner

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import argparse
import glob
import logging
import os
import random
import shlex
import shutil
import signal
import string
import subprocess
import sys
from typing import Any

from integration_test_images import get_docker_env


def random_str(length: int = 6) -> str:
    alphabet = string.ascii_lowercase + string.digits
    return "".join(random.SystemRandom().choice(alphabet) for _ in range(length))


CUR_FILE_DIR = os.path.dirname(os.path.realpath(__file__))
DEFAULT_CLICKHOUSE_ROOT = os.path.abspath(os.path.join(CUR_FILE_DIR, "../../"))
CURRENT_WORK_DIR = os.getcwd()
VOLUME_NAME = "clickhouse_integration_tests"
CONTAINER_NAME = f"{VOLUME_NAME}_{random_str()}"

CONFIG_DIR_IN_REPO = "programs/server"
INTEGRATION_DIR_IN_REPO = "tests/integration"
UTILS_DIR_IN_REPO = "utils"

DIND_INTEGRATION_TESTS_IMAGE_NAME = "clickhouse/integration-tests-runner"


def physical_memory() -> int:
    try:
        # for linux
        return os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES")
    except ValueError:
        # for MacOS
        return int(subprocess.check_output(["sysctl", "-n", "hw.memsize"]).strip())


def check_args_and_update_paths(args: argparse.Namespace) -> None:
    if args.clickhouse_root:
        if not os.path.isabs(args.clickhouse_root):
            CLICKHOUSE_ROOT = os.path.abspath(args.clickhouse_root)
        else:
            CLICKHOUSE_ROOT = args.clickhouse_root
    else:
        logging.info("ClickHouse root is not set. Will use %s", DEFAULT_CLICKHOUSE_ROOT)
        CLICKHOUSE_ROOT = DEFAULT_CLICKHOUSE_ROOT

    if not os.path.isabs(args.binary):
        args.binary = os.path.abspath(os.path.join(CURRENT_WORK_DIR, args.binary))

    if not args.odbc_bridge_binary:
        args.odbc_bridge_binary = os.path.join(
            os.path.dirname(args.binary), "clickhouse-odbc-bridge"
        )
    elif not os.path.isabs(args.odbc_bridge_binary):
        args.odbc_bridge_binary = os.path.abspath(
            os.path.join(CURRENT_WORK_DIR, args.odbc_bridge_binary)
        )

    if not args.library_bridge_binary:
        args.library_bridge_binary = os.path.join(
            os.path.dirname(args.binary), "clickhouse-library-bridge"
        )
    elif not os.path.isabs(args.library_bridge_binary):
        args.library_bridge_binary = os.path.abspath(
            os.path.join(CURRENT_WORK_DIR, args.library_bridge_binary)
        )

    if args.base_configs_dir:
        if not os.path.isabs(args.base_configs_dir):
            args.base_configs_dir = os.path.abspath(
                os.path.join(CURRENT_WORK_DIR, args.base_configs_dir)
            )
    else:
        args.base_configs_dir = os.path.abspath(
            os.path.join(CLICKHOUSE_ROOT, CONFIG_DIR_IN_REPO)
        )
        logging.info("Base configs dir is not set. Will use %s", args.base_configs_dir)

    if args.cases_dir:
        if not os.path.isabs(args.cases_dir):
            args.cases_dir = os.path.abspath(
                os.path.join(CURRENT_WORK_DIR, args.cases_dir)
            )
    else:
        args.cases_dir = os.path.abspath(
            os.path.join(CLICKHOUSE_ROOT, INTEGRATION_DIR_IN_REPO)
        )
        logging.info("Cases dir is not set. Will use %s", args.cases_dir)

    if args.utils_dir:
        if not os.path.isabs(args.utils_dir):
            args.utils_dir = os.path.abspath(
                os.path.join(CURRENT_WORK_DIR, args.utils_dir)
            )
    else:
        args.utils_dir = os.path.abspath(
            os.path.join(CLICKHOUSE_ROOT, UTILS_DIR_IN_REPO)
        )
        logging.info("utils dir is not set. Will use %s", args.utils_dir)

    logging.info(
        "base_configs_dir: %s, binary: %s, cases_dir: %s ",
        args.base_configs_dir,
        args.binary,
        args.cases_dir,
    )

    for path in [
        args.binary,
        args.odbc_bridge_binary,
        args.library_bridge_binary,
        args.base_configs_dir,
        args.cases_dir,
        CLICKHOUSE_ROOT,
    ]:
        if not os.path.exists(path):
            raise FileNotFoundError(f"Path {path} doesn't exist")

    if args.dockerd_volume:
        if not os.path.isabs(args.dockerd_volume):
            args.dockerd_volume = os.path.abspath(
                os.path.join(CURRENT_WORK_DIR, args.dockerd_volume)
            )

    if (not os.path.exists(os.path.join(args.base_configs_dir, "config.xml"))) and (
        not os.path.exists(os.path.join(args.base_configs_dir, "config.yaml"))
    ):
        raise FileNotFoundError(
            f"No config.xml or config.yaml in {args.base_configs_dir}"
        )

    if (not os.path.exists(os.path.join(args.base_configs_dir, "users.xml"))) and (
        not os.path.exists(os.path.join(args.base_configs_dir, "users.yaml"))
    ):
        raise FileNotFoundError(
            f"No users.xml or users.yaml in {args.base_configs_dir}"
        )


def check_iptables_legacy() -> None:
    iptables_path = shutil.which("iptables")
    ip6tables_path = shutil.which("ip6tables")

    if iptables_path is None:
        print("Error: 'iptables' not found in PATH")
        sys.exit(1)
    if ip6tables_path is None:
        print("Error: 'ip6tables' not found in PATH, ignoring")

    try:
        file_info = os.stat(iptables_path)
        file_info_str = str(file_info)

        if "legacy" in file_info_str:
            print(
                """
                iptables on your host machine is in 'legacy' mode. This is not supported.

                Please switch to 'nftables' mode, usualy by installing `iptables-nft` or `nftables`, consult your distribution manual.
                Or, use --ignore-iptables-legacy-check.
                """
            )
            sys.exit(1)

        if not ip6tables_path:
            return

        file_info = os.stat(ip6tables_path)
        file_info_str = str(file_info)

        if "legacy" in file_info_str:
            print(
                """
                ip6tables on your host machine is in 'legacy' mode. This is not supported.

                Please switch to 'nftables' mode, usualy by installing `iptables-nft` or `nftables`, consult your distribution manual.
                Or, use --ignore-iptables-legacy-check.
                """
            )
            sys.exit(1)

    except FileNotFoundError:
        print(f"Error: '{iptables_path}' not found")
        sys.exit(1)


def docker_kill_handler_handler(signum: Any, frame: Any) -> None:
    _, _ = signum, frame
    subprocess.check_call(
        f"docker ps --all --quiet --filter name={CONTAINER_NAME}",
        shell=True,
    )
    raise KeyboardInterrupt("Killed by Ctrl+C")


signal.signal(signal.SIGINT, docker_kill_handler_handler)

# Integration tests runner should allow to run tests on several versions of ClickHouse.
# Integration tests should be portable.
# To run integration tests following artfacts should be sufficient:
#   - clickhouse binaries (env CLICKHOUSE_TESTS_SERVER_BIN_PATH or --binary arg)
#   - clickhouse default configs(config.xml, users.xml) from same version as binary (env CLICKHOUSE_TESTS_BASE_CONFIG_DIR or --base-configs-dir arg)
#   - odbc bridge binary (env CLICKHOUSE_TESTS_ODBC_BRIDGE_BIN_PATH or --odbc-bridge-binary arg)
#   - library bridge binary (env CLICKHOUSE_TESTS_LIBRARY_BRIDGE_BIN_PATH or --library-bridge-binary)
#   - tests/integration directory with all test cases and configs (env CLICKHOUSE_TESTS_INTEGRATION_PATH or --cases-dir)
#
# 1) --clickhouse-root is only used to determine other paths on default places
# 2) path of runner script is used to determine paths for trivial case, when we run it from repository

if __name__ == "__main__":
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s [ %(process)d ] %(levelname)s : %(message)s (%(filename)s:%(lineno)s, %(funcName)s)",
    )

    parser = argparse.ArgumentParser(
        description="ClickHouse integration tests runner",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )

    parser.add_argument(
        "--binary",
        default=os.environ.get(
            "CLICKHOUSE_TESTS_SERVER_BIN_PATH",
            os.environ.get("CLICKHOUSE_TESTS_CLIENT_BIN_PATH", "/usr/bin/clickhouse"),
        ),
        help="Path to clickhouse binary. For example /usr/bin/clickhouse",
    )

    parser.add_argument(
        "--odbc-bridge-binary",
        default=os.environ.get("CLICKHOUSE_TESTS_ODBC_BRIDGE_BIN_PATH", ""),
        help="Path to clickhouse-odbc-bridge binary. Defaults to clickhouse-odbc-bridge in the same dir as clickhouse.",
    )

    parser.add_argument(
        "--library-bridge-binary",
        default=os.environ.get("CLICKHOUSE_TESTS_LIBRARY_BRIDGE_BIN_PATH", ""),
        help="Path to clickhouse-library-bridge binary. Defaults to clickhouse-library-bridge in the same dir as clickhouse.",
    )

    parser.add_argument(
        "--base-configs-dir",
        default=os.environ.get("CLICKHOUSE_TESTS_BASE_CONFIG_DIR"),
        help="Path to clickhouse base configs directory with config.xml/users.xml",
    )

    parser.add_argument(
        "--cases-dir",
        default=os.environ.get("CLICKHOUSE_TESTS_INTEGRATION_PATH"),
        help="Path to integration tests cases and configs directory. For example tests/integration in repository",
    )

    parser.add_argument(
        "--utils-dir",
        default=os.environ.get("CLICKHOUSE_UTILS_DIR"),
        help="Path to the 'utils' directory in repository. Used to provide Python modules for grpc protocol schemas which are located in the 'utils' directory",
    )

    parser.add_argument(
        "--clickhouse-root",
        help="Path to repository root folder. Used to take configuration from repository default paths.",
    )

    parser.add_argument(
        "--command",
        default="",
        help="Set it to run some other command in container (for example bash)",
    )

    parser.add_argument(
        "--disable-net-host",
        action="store_true",
        default=False,
        help="Don't use net host in parent docker container",
    )

    parser.add_argument(
        "--network",
        help="Set network driver for runnner container (defaults to `host`)",
    )

    parser.add_argument(
        "--memory",
        # Default is host memory - 2GiB
        default=physical_memory() - 2 * 1024**3,
        help="Set the memory limit for the docker container, "
        "accepts the same argument as `docker run --memory`",
    )

    parser.add_argument(
        "--docker-image-version",
        default="latest",
        help="Version of docker image which runner will use to run tests",
    )

    parser.add_argument(
        "--docker-compose-images-tags",
        action="append",
        help="Set non-default tags for images used in docker compose recipes(yandex/my_container:my_tag)",
    )

    parser.add_argument(
        "-n", "--parallel", action="store", dest="parallel", help="Parallelism"
    )

    parser.add_argument(
        "--count", action="store", type=int, dest="count", help="Repeat count"
    )

    parser.add_argument(
        "--no-random",
        action="store",
        dest="no_random",
        help="Disable tests order randomization",
    )

    parser.add_argument(
        "--pre-pull",
        action="store_true",
        default=False,
        dest="pre_pull",
        help="Pull images for docker_compose before all other actions",
    )

    parser.add_argument(
        "-t",
        "--tests_list",
        action="store",
        nargs="+",
        default=[],
        dest="tests_list",
        help="List of tests to run",
    )

    parser.add_argument(
        "-k",
        "--keyword_expression",
        action="store",
        dest="keyword_expression",
        help="pytest keyword expression",
    )

    parser.add_argument(
        "--tmpfs",
        action="store_true",
        default=False,
        dest="tmpfs",
        help="Use tmpfs for dockerd files",
    )

    parser.add_argument(
        "--old-analyzer",
        action="store_true",
        default=False,
        dest="old_analyzer",
        help="Use old analyzer infrastructure",
    )

    parser.add_argument(
        "--cleanup-containers",
        action="store_true",
        default=False,
        dest="cleanup_containers",
        help="Remove all running containers on test session start",
    )

    parser.add_argument(
        "--dockerd-volume-dir",
        action="store",
        dest="dockerd_volume",
        help="Bind volume to this dir to use for dockerd files",
    )

    parser.add_argument(
        "--ignore-iptables-legacy-check",
        action="store_true",
        default=False,
        help="Ignore iptables-legacy usage check",
    )

    parser.add_argument("pytest_args", nargs="*", help="args for pytest command")

    args = parser.parse_args()

    check_args_and_update_paths(args)

    if not args.ignore_iptables_legacy_check:
        check_iptables_legacy()
    else:
        logging.warning("Skipping iptables-legacy check")

    parallel_args = ""
    if args.parallel:
        parallel_args += "--dist=loadfile"
        parallel_args += f" -n {args.parallel}".format()

    repeat_args = (
        f" --count {args.count}" if args.count is not None and args.count > 0 else ""
    )

    rand_args = ""
    # if not args.no_random:
    #     rand_args += f"--random-seed={os.getpid()}"

    net = ""
    if args.network:
        net = f"--net={args.network}"
    elif not args.disable_net_host:
        net = "--net=host"

    env_tags = ""

    if args.docker_compose_images_tags is not None:
        for img_tag in args.docker_compose_images_tags:
            [image, tag] = img_tag.split(":")
            env_tag = get_docker_env(image, tag)
            if env_tag:
                env_tags += env_tag
            else:
                logging.info("Unknown image %s", image)

    # create named volume which will be used inside to store images and other docker related files,
    # to avoid redownloading it every time
    #
    # should be removed manually when not needed
    dockerd_internal_volume = ""
    if args.tmpfs:
        dockerd_internal_volume = "--tmpfs /var/lib/docker -e DOCKER_RAMDISK=true"
    elif args.dockerd_volume:
        dockerd_internal_volume = (
            f"--mount type=bind,source={args.dockerd_volume},target=/var/lib/docker"
        )
    else:
        try:
            subprocess.check_call(
                f"docker volume create {VOLUME_NAME}_volume", shell=True
            )
        except Exception as ex:
            print("Volume creation failed, probably it already exists, exception", ex)
        # TODO: this part cleans out stale volumes produced by container name
        # randomizer, we should remove it after Sep 2022
        try:
            subprocess.check_call(
                f"docker volume ls -q | grep '{VOLUME_NAME}_.*_volume' | xargs --no-run-if-empty docker volume rm",
                shell=True,
            )
        except Exception as ex:
            print("Probably, some stale volumes still there, just continue:", ex)
        # TODO END
        dockerd_internal_volume = f"--volume={VOLUME_NAME}_volume:/var/lib/docker"

    # If enabled we kill and remove containers before pytest session run.
    env_cleanup = ""
    if args.cleanup_containers:
        env_cleanup = "-e PYTEST_CLEANUP_CONTAINERS=1"
    # enable tty mode & interactive for docker if we have real tty
    tty = ""
    if sys.stdout.isatty() and sys.stdin.isatty():
        tty = "-it"

    # Remove old logs.
    for old_log_path in glob.glob(args.cases_dir + "/pytest*.log"):
        os.remove(old_log_path)

    if args.keyword_expression:
        args.pytest_args += ["-k", args.keyword_expression]

    use_old_analyzer = ""
    if args.old_analyzer:
        use_old_analyzer = "-e CLICKHOUSE_USE_OLD_ANALYZER=1"

    # NOTE: since pytest options is in the argument value already we need to additionally escape '"'
    pytest_opts = " ".join(
        map(lambda x: shlex.quote(x).replace('"', '\\"'), args.pytest_args)
    )
    tests_list = " ".join(
        map(lambda x: shlex.quote(x).replace('"', '\\"'), args.tests_list)
    )

    cmd_base = (
        f"docker run {net} {tty} --rm --name {CONTAINER_NAME} "
        "--privileged --dns-search='.' "  # since recent dns search leaks from host
        f"--memory={args.memory} "
        f"--volume={args.odbc_bridge_binary}:/clickhouse-odbc-bridge "
        f"--volume={args.binary}:/clickhouse "
        f"--volume={args.library_bridge_binary}:/clickhouse-library-bridge "
        f"--volume={args.base_configs_dir}:/clickhouse-config "
        f"--volume={args.cases_dir}:/ClickHouse/tests/integration "
        f"--volume={args.utils_dir}/backupview:/ClickHouse/utils/backupview "
        f"--volume={args.utils_dir}/grpc-client/pb2:/ClickHouse/utils/grpc-client/pb2 "
        f"--volume=/run:/run/host:ro {dockerd_internal_volume} {env_tags} {env_cleanup} "
        f"-e DOCKER_CLIENT_TIMEOUT=300 -e COMPOSE_HTTP_TIMEOUT=600 {use_old_analyzer} -e PYTHONUNBUFFERED=1 "
        f'-e PYTEST_ADDOPTS="{parallel_args} {repeat_args} {pytest_opts} {tests_list} {rand_args} -vvv"'
        f" {DIND_INTEGRATION_TESTS_IMAGE_NAME}:{args.docker_image_version}"
    )

    cmd = cmd_base + " " + args.command
    cmd_pre_pull = (
        f"{cmd_base} find /ClickHouse/tests/integration/compose -name docker_compose_*.yml "
        r"-exec docker compose -f '{}' pull \;"
    )

    containers = subprocess.check_output(
        f"docker ps --all --quiet --filter name={CONTAINER_NAME}",
        shell=True,
        universal_newlines=True,
    ).splitlines()
    if containers:
        print(f"Trying to kill containers name={CONTAINER_NAME} ids={containers}")
        subprocess.check_call(f"docker kill {' '.join(containers)}", shell=True)
        print(f"Containers {containers} killed")

    if args.pre_pull:
        print(("Running pre pull as: '" + cmd_pre_pull + "'."))
        subprocess.check_call(cmd_pre_pull, shell=True)

    print(("Running pytest container as: '" + cmd + "'."))
    subprocess.check_call(cmd, shell=True)