ClickHouse/tests/ci/stress.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""This script is used in docker images for stress tests and upgrade tests"""
import argparse
import logging
import random
import time
from multiprocessing import cpu_count
from pathlib import Path
from subprocess import PIPE, STDOUT, Popen, call, check_output
from typing import List


def get_options(i: int, upgrade_check: bool) -> str:
    options = []
    client_options = []
    if i > 0:
        options.append("--order=random")

    if i % 3 == 2 and not upgrade_check:
        options.append(f'''--db-engine="Replicated('/test/db/test_{i}', 's1', 'r1')"''')
        client_options.append("enable_deflate_qpl_codec=1")
        client_options.append("enable_zstd_qat_codec=1")

    # If database name is not specified, new database is created for each functional test.
    # Run some threads with one database for all tests.
    if i % 2 == 1:
        options.append(f" --database=test_{i}")

    if i % 3 == 1:
        client_options.append("join_use_nulls=1")

    if i % 2 == 1:
        join_alg_num = i // 2
        if join_alg_num % 5 == 0:
            client_options.append("join_algorithm='parallel_hash'")
        if join_alg_num % 5 == 1:
            client_options.append("join_algorithm='partial_merge'")
        if join_alg_num % 5 == 2:
            client_options.append("join_algorithm='full_sorting_merge'")
        if join_alg_num % 5 == 3 and not upgrade_check:
            # Some crashes are not fixed in 23.2 yet, so ignore the setting in Upgrade check
            client_options.append("join_algorithm='grace_hash'")
        if join_alg_num % 5 == 4:
            client_options.append("join_algorithm='auto'")
            client_options.append("max_rows_in_join=1000")

    if i > 0 and random.random() < 1 / 3:
        client_options.append("use_query_cache=1")
        client_options.append("query_cache_nondeterministic_function_handling='ignore'")
        client_options.append("query_cache_system_table_handling='ignore'")

    if i % 5 == 1:
        client_options.append("memory_tracker_fault_probability=0.001")

    if i % 5 == 1:
        client_options.append(
            "merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.05"
        )

    if i % 2 == 1 and not upgrade_check:
        client_options.append("group_by_use_nulls=1")

    # 12 % 3 == 0, so it's Atomic database
    if i == 12 and not upgrade_check:
        client_options.append("implicit_transaction=1")
        client_options.append("throw_on_unsupported_query_inside_transaction=0")

    if random.random() < 0.1:
        client_options.append("optimize_trivial_approximate_count_query=1")

    if random.random() < 0.3:
        client_options.append(f"http_make_head_request={random.randint(0, 1)}")

    # TODO: After release 24.3 use ignore_drop_queries_probability for both
    #       stress test and upgrade check
    if not upgrade_check:
        client_options.append("ignore_drop_queries_probability=0.5")

    if random.random() < 0.2:
        client_options.append("enable_parallel_replicas=1")
        client_options.append("max_parallel_replicas=3")
        client_options.append("cluster_for_parallel_replicas='parallel_replicas'")
        client_options.append("parallel_replicas_for_non_replicated_merge_tree=1")

    if client_options:
        options.append(" --client-option " + " ".join(client_options))

    return " ".join(options)


def run_func_test(
    cmd: str,
    output_prefix: Path,
    num_processes: int,
    skip_tests_option: str,
    global_time_limit: int,
    upgrade_check: bool,
) -> List[Popen]:
    upgrade_check_option = "--upgrade-check" if upgrade_check else ""
    global_time_limit_option = (
        f"--global_time_limit={global_time_limit}" if global_time_limit else ""
    )

    output_paths = [
        output_prefix / f"stress_test_run_{i}.txt" for i in range(num_processes)
    ]
    pipes = []
    for i, path in enumerate(output_paths):
        with open(path, "w", encoding="utf-8") as op:
            full_command = (
                f"{cmd} {get_options(i, upgrade_check)} {global_time_limit_option} "
                f"{skip_tests_option} {upgrade_check_option}"
            )
            logging.info("Run func tests '%s'", full_command)
            # pylint:disable-next=consider-using-with
            pipes.append(Popen(full_command, shell=True, stdout=op, stderr=op))
            time.sleep(0.5)
    return pipes


def compress_stress_logs(output_path: Path, files_prefix: str) -> None:
    cmd = (
        f"cd {output_path} && tar --zstd --create --file=stress_run_logs.tar.zst "
        f"{files_prefix}* && rm {files_prefix}*"
    )
    check_output(cmd, shell=True)


def call_with_retry(query: str, timeout: int = 30, retry_count: int = 5) -> None:
    logging.info("Running command: %s", str(query))
    for i in range(retry_count):
        code = call(query, shell=True, stderr=STDOUT, timeout=timeout)
        if code != 0:
            logging.info("Command returend %s, retrying", str(code))
            time.sleep(i)
        else:
            break


def make_query_command(query: str) -> str:
    return (
        f'clickhouse client -q "{query}" --max_untracked_memory=1Gi '
        "--memory_profiler_step=1Gi --max_memory_usage_for_user=0 --max_memory_usage_in_client=1000000000 "
        "--enable-progress-table-toggle=0"
    )


def prepare_for_hung_check(drop_databases: bool) -> bool:
    # FIXME this function should not exist, but...

    # We attach gdb to clickhouse-server before running tests
    # to print stacktraces of all crashes even if clickhouse cannot print it for some reason.
    # However, it obstructs checking for hung queries.
    logging.info("Will terminate gdb (if any)")
    call_with_retry("kill -TERM $(pidof gdb)")
    call_with_retry(
        "timeout 50s tail --pid=$(pidof gdb) -f /dev/null || kill -9 $(pidof gdb) ||:",
        timeout=60,
    )
    # Sometimes there is a message `Child process was stopped by signal 19` in logs after stopping gdb
    call_with_retry(
        "kill -CONT $(cat /var/run/clickhouse-server/clickhouse-server.pid) && clickhouse client -q 'SELECT 1 FORMAT Null'"
    )

    # ThreadFuzzer significantly slows down server and causes false-positive hung check failures
    call_with_retry(make_query_command("SYSTEM STOP THREAD FUZZER"))
    # Some tests execute SYSTEM STOP MERGES or similar queries.
    # It may cause some ALTERs to hang.
    # Possibly we should fix tests and forbid to use such queries without specifying table.
    call_with_retry(make_query_command("SYSTEM START MERGES"))
    call_with_retry(make_query_command("SYSTEM START DISTRIBUTED SENDS"))
    call_with_retry(make_query_command("SYSTEM START TTL MERGES"))
    call_with_retry(make_query_command("SYSTEM START MOVES"))
    call_with_retry(make_query_command("SYSTEM START FETCHES"))
    call_with_retry(make_query_command("SYSTEM START REPLICATED SENDS"))
    call_with_retry(make_query_command("SYSTEM START REPLICATION QUEUES"))
    call_with_retry(make_query_command("SYSTEM DROP MARK CACHE"))

    # Issue #21004, live views are experimental, so let's just suppress it
    call_with_retry(make_query_command("KILL QUERY WHERE upper(query) LIKE 'WATCH %'"))

    # Kill other queries which known to be slow
    # It's query from 01232_preparing_sets_race_condition_long,
    # it may take up to 1000 seconds in slow builds
    call_with_retry(
        make_query_command("KILL QUERY WHERE query LIKE 'insert into tableB select %'")
    )
    # Long query from 00084_external_agregation
    call_with_retry(
        make_query_command(
            "KILL QUERY WHERE query LIKE 'SELECT URL, uniq(SearchPhrase) AS u FROM "
            "test.hits GROUP BY URL ORDER BY u %'"
        )
    )
    # Long query from 02136_kill_scalar_queries
    call_with_retry(
        make_query_command(
            "KILL QUERY WHERE query LIKE "
            "'SELECT (SELECT number FROM system.numbers WHERE number = 1000000000000)%'"
        )
    )

    if drop_databases:
        for i in range(5):
            try:
                # Here we try to drop all databases in async mode.
                # If some queries really hung, than drop will hung too.
                # Otherwise we will get rid of queries which wait for background pool.
                # It can take a long time on slow builds (more than 900 seconds).
                #
                # Also specify max_untracked_memory to allow 1GiB of memory to overcommit.
                databases = (
                    check_output(
                        make_query_command("SHOW DATABASES"), shell=True, timeout=30
                    )
                    .decode("utf-8")
                    .strip()
                    .split()
                )
                for db in databases:
                    if db == "system":
                        continue
                    command = make_query_command(f"DETACH DATABASE {db}")
                    # we don't wait for drop
                    # pylint:disable-next=consider-using-with
                    Popen(command, shell=True)
                break
            except Exception as ex:
                logging.error(
                    "Failed to SHOW or DROP databasese, will retry %s", str(ex)
                )
                time.sleep(i)
        else:
            raise RuntimeError(
                "Cannot drop databases after stress tests. Probably server consumed "
                "too much memory and cannot execute simple queries"
            )

    # Wait for last queries to finish if any, not longer than 300 seconds
    call(
        make_query_command(
            """
    SELECT sleepEachRow((
        SELECT maxOrDefault(300 - elapsed) + 1
        FROM system.processes
        WHERE query NOT LIKE '%FROM system.processes%' AND elapsed < 300
    ) / 300)
    FROM numbers(300)
    FORMAT Null
    SETTINGS function_sleep_max_microseconds_per_block = 0
    """
        ),
        shell=True,
        stderr=STDOUT,
        timeout=330,
    )

    # Even if all clickhouse-test processes are finished, there are probably some sh scripts,
    # which still run some new queries. Let's ignore them.
    try:
        query = 'clickhouse client -q "SELECT count() FROM system.processes where elapsed > 300" '
        output = (
            check_output(query, shell=True, stderr=STDOUT, timeout=30)
            .decode("utf-8")
            .strip()
        )
        if int(output) == 0:
            return False
    except:
        pass
    return True


def is_ubsan_build() -> bool:
    try:
        query = (
            'clickhouse client -q "SELECT value FROM system.build_options '
            "WHERE name = 'CXX_FLAGS'\" "
        )
        output = (
            check_output(query, shell=True, stderr=STDOUT, timeout=30)
            .decode("utf-8")
            .strip()
        )
        return "-fsanitize=undefined" in output
    except Exception as e:
        logging.info("Failed to get build flags: %s", str(e))
        return False


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        description="ClickHouse script for running stresstest"
    )
    parser.add_argument("--test-cmd", default="/usr/bin/clickhouse-test")
    parser.add_argument("--skip-func-tests", default="")
    parser.add_argument(
        "--server-log-folder", default="/var/log/clickhouse-server", type=Path
    )
    parser.add_argument("--output-folder", type=Path)
    parser.add_argument("--global-time-limit", type=int, default=1800)
    parser.add_argument("--num-parallel", type=int, default=cpu_count())
    parser.add_argument("--upgrade-check", action="store_true")
    parser.add_argument("--hung-check", action="store_true", default=False)
    # make sense only for hung check
    parser.add_argument("--drop-databases", action="store_true", default=False)
    return parser.parse_args()


def main():
    logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
    args = parse_args()

    if args.drop_databases and not args.hung_check:
        raise argparse.ArgumentTypeError(
            "--drop-databases only used in hung check (--hung-check)"
        )

    # FIXME Hung check with ubsan is temporarily disabled due to
    # https://github.com/ClickHouse/ClickHouse/issues/45372
    suppress_hung_check = is_ubsan_build()

    func_pipes = []
    func_pipes = run_func_test(
        args.test_cmd,
        args.output_folder,
        args.num_parallel,
        args.skip_func_tests,
        args.global_time_limit,
        args.upgrade_check,
    )

    logging.info("Will wait functests to finish")
    while True:
        retcodes = []
        for p in func_pipes:
            if p.poll() is not None:
                retcodes.append(p.returncode)
        if len(retcodes) == len(func_pipes):
            break
        logging.info("Finished %s from %s processes", len(retcodes), len(func_pipes))
        time.sleep(5)

    logging.info("All processes finished")

    logging.info("Compressing stress logs")
    compress_stress_logs(args.output_folder, "stress_test_run_")
    logging.info("Logs compressed")

    if args.hung_check:
        try:
            have_long_running_queries = prepare_for_hung_check(args.drop_databases)
        except Exception as ex:
            have_long_running_queries = True
            logging.error("Failed to prepare for hung check: %s", str(ex))
        logging.info("Checking if some queries hung")
        cmd = " ".join(
            [
                args.test_cmd,
                # Do not track memory allocations up to 1Gi,
                # this will allow to ignore server memory limit (max_server_memory_usage) for this query.
                #
                # NOTE: memory_profiler_step should be also adjusted, because:
                #
                #     untracked_memory_limit = min(settings.max_untracked_memory, settings.memory_profiler_step)
                #
                # NOTE: that if there will be queries with GROUP BY, this trick
                # will not work due to CurrentMemoryTracker::check() from
                # Aggregator code.
                # But right now it should work, since neither hung check, nor 00001_select_1 has GROUP BY.
                "--client-option",
                "max_untracked_memory=1Gi",
                "max_memory_usage_for_user=0",
                "memory_profiler_step=1Gi",
                # Use system database to avoid CREATE/DROP DATABASE queries
                "--database=system",
                "--hung-check",
                "--report-logs-stats",
                "00001_select_1",
            ]
        )
        hung_check_log = args.output_folder / "hung_check.log"  # type: Path
        with Popen(["/usr/bin/tee", hung_check_log], stdin=PIPE) as tee:
            res = call(cmd, shell=True, stdout=tee.stdin, stderr=STDOUT, timeout=600)
            if tee.stdin is not None:
                tee.stdin.close()
        if res != 0 and have_long_running_queries and not suppress_hung_check:
            logging.info("Hung check failed with exit code %d", res)
        else:
            hung_check_status = "No queries hung\tOK\t\\N\t\n"
            with open(
                args.output_folder / "test_results.tsv", "w+", encoding="utf-8"
            ) as results:
                results.write(hung_check_status)
                hung_check_log.unlink()

    logging.info("Stress test finished")


if __name__ == "__main__":
    main()