cleanup, fix tee to escape non-decodable symbols

2024-11-25 17:12:03 +00:00 · 2023-09-14 20:06:53 +00:00 · 2023-09-14 20:06:53 +00:00 · 3a14bde95a
commit 3a14bde95a
parent cd0c775355
3 changed files with 17 additions and 331 deletions
--- a/docker/test/libfuzzer/run_libfuzzer.sh
+++ b/docker/test/libfuzzer/run_libfuzzer.sh
@ -1,77 +0,0 @@
-#!/bin/bash -eu
-
-# Fuzzer runner. Appends .options arguments and seed corpus to users args.
-# Usage: $0 <fuzzer_name> <fuzzer_args>
-
-# export PATH=$OUT:$PATH
-# cd $OUT
-
-DEBUGGER=${DEBUGGER:-}
-FUZZER_ARGS=${FUZZER_ARGS:-}
-
-function run_fuzzer() {
-    FUZZER=$1
-
-    echo Running fuzzer "$FUZZER"
-
-    CORPUS_DIR=""
-    if [ -d "${FUZZER}.in" ]; then
-        CORPUS_DIR="${FUZZER}.in"
-    fi
-
-    OPTIONS_FILE="${FUZZER}.options"
-    CUSTOM_LIBFUZZER_OPTIONS=""
-
-    if [ -f "$OPTIONS_FILE" ]; then
-        custom_asan_options=$(/parse_options.py "$OPTIONS_FILE" asan)
-        if [ -n "$custom_asan_options" ]; then
-            export ASAN_OPTIONS="$ASAN_OPTIONS:$custom_asan_options"
-        fi
-
-        custom_msan_options=$(/parse_options.py "$OPTIONS_FILE" msan)
-        if [ -n "$custom_msan_options" ]; then
-            export MSAN_OPTIONS="$MSAN_OPTIONS:$custom_msan_options"
-        fi
-
-        custom_ubsan_options=$(/parse_options.py "$OPTIONS_FILE" ubsan)
-        if [ -n "$custom_ubsan_options" ]; then
-            export UBSAN_OPTIONS="$UBSAN_OPTIONS:$custom_ubsan_options"
-        fi
-
-        CUSTOM_LIBFUZZER_OPTIONS=$(/parse_options.py "$OPTIONS_FILE" libfuzzer)
-    fi
-
-    CMD_LINE="./$FUZZER $FUZZER_ARGS"
-    CMD_LINE="$CMD_LINE $CORPUS_DIR"
-
-    if [[ -n "$CUSTOM_LIBFUZZER_OPTIONS" ]]; then
-        CMD_LINE="$CMD_LINE $CUSTOM_LIBFUZZER_OPTIONS"
-    fi
-
-    if [[ ! "$CMD_LINE" =~ "-dict=" ]]; then
-        if [ -f "$FUZZER.dict" ]; then
-            CMD_LINE="$CMD_LINE -dict=$FUZZER.dict"
-        fi
-    fi
-
-    CMD_LINE="$CMD_LINE < /dev/null"
-
-    echo "$CMD_LINE"
-
-    # Unset OUT so the fuzz target can't rely on it.
-    # unset OUT
-
-    if [ -n "$DEBUGGER" ]; then
-        CMD_LINE="$DEBUGGER $CMD_LINE"
-    fi
-
-    bash -c "$CMD_LINE"
-}
-
-ls -al
-
-for fuzzer in *_fuzzer; do
-    if [ -f "$fuzzer" ] && [ -x "$fuzzer" ]; then
-        run_fuzzer "$fuzzer"
-    fi
-done
--- a/tests/ci/libfuzzer_test_check.py
+++ b/tests/ci/libfuzzer_test_check.py
@ -1,49 +1,37 @@
 #!/usr/bin/env python3

 import argparse
-import csv
 import logging
 import os
-import re
 import subprocess
 import sys
 import atexit
 import zipfile
 from pathlib import Path
-from typing import List, Tuple
+from typing import List

 from github import Github

-# from build_download_helper import download_all_deb_packages
 from build_download_helper import download_fuzzers
 from clickhouse_helper import (
    CiLogsCredentials,
-    # ClickHouseHelper,
-    # prepare_tests_results_for_clickhouse,
 )
 from commit_status_helper import (
-    # NotSet,
    RerunHelper,
    get_commit,
-    # override_status,
-    # post_commit_status,
-    # post_commit_status_to_file,
    update_mergeable_check,
 )
 from docker_pull_helper import DockerImage, get_image_with_version

-# from download_release_packages import download_last_release
 from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH
 from get_robot_token import get_best_robot_token
-from pr_info import PRInfo  # , FORCE_TESTS_LABEL
-from report import TestResults, read_test_results
+from pr_info import PRInfo
+from report import TestResults

-# from s3_helper import S3Helper
 from stopwatch import Stopwatch

 from tee_popen import TeePopen

-# from upload_result_helper import upload_results

 NO_CHANGES_MSG = "Nothing to run"

@ -70,34 +58,18 @@ def get_additional_envs(check_name, run_by_hash_num, run_by_hash_total):
    return result


-# def get_image_name(check_name):
-#     if "stateless" in check_name.lower():
-#         return "clickhouse/stateless-test"
-#     if "stateful" in check_name.lower():
-#         return "clickhouse/stateful-test"
-#     else:
-#         raise Exception(f"Cannot deduce image name based on check name {check_name}")
-
-
 def get_run_command(
-    #    check_name: str,
-    fuzzers_path: str,
-    repo_path: str,
-    result_path: str,
-    #    server_log_path: str,
+    fuzzers_path: Path,
+    repo_path: Path,
+    result_path: Path,
    kill_timeout: int,
    additional_envs: List[str],
    ci_logs_args: str,
    image: DockerImage,
-    #    flaky_check: bool,
-    #    tests_to_run: List[str],
 ) -> str:
    additional_options = ["--hung-check"]
    additional_options.append("--print-time")

-    # if tests_to_run:
-    #     additional_options += tests_to_run
-
    additional_options_str = (
        '-e ADDITIONAL_OPTIONS="' + " ".join(additional_options) + '"'
    )
@ -108,18 +80,9 @@ def get_run_command(
        '-e S3_URL="https://s3.amazonaws.com/clickhouse-datasets"',
    ]

-    # if flaky_check:
-    #     envs.append("-e NUM_TRIES=100")
-    #     envs.append("-e MAX_RUN_TIME=1800")
-
    envs += [f"-e {e}" for e in additional_envs]

    env_str = " ".join(envs)
-    # volume_with_broken_test = (
-    #     f"--volume={repo_path}/tests/analyzer_tech_debt.txt:/analyzer_tech_debt.txt "
-    #     if "analyzer" in check_name
-    #     else ""
-    # )

    return (
        f"docker run "
@ -127,96 +90,11 @@ def get_run_command(
        f"--workdir=/fuzzers "
        f"--volume={fuzzers_path}:/fuzzers "
        f"--volume={repo_path}/tests:/usr/share/clickhouse-test "
-        #        f"{volume_with_broken_test}"
        f"--volume={result_path}:/test_output "
-        #        f"--volume={server_log_path}:/var/log/clickhouse-server "
        f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image}"
    )


-def get_tests_to_run(pr_info: PRInfo) -> List[str]:
-    result = set()
-
-    if pr_info.changed_files is None:
-        return []
-
-    for fpath in pr_info.changed_files:
-        if re.match(r"tests/queries/0_stateless/[0-9]{5}", fpath):
-            logging.info("File '%s' is changed and seems like a test", fpath)
-            fname = fpath.split("/")[3]
-            fname_without_ext = os.path.splitext(fname)[0]
-            # add '.' to the end of the test name not to run all tests with the same prefix
-            # e.g. we changed '00001_some_name.reference'
-            # and we have ['00001_some_name.sh', '00001_some_name_2.sql']
-            # so we want to run only '00001_some_name.sh'
-            result.add(fname_without_ext + ".")
-        elif "tests/queries/" in fpath:
-            # log suspicious changes from tests/ for debugging in case of any problems
-            logging.info("File '%s' is changed, but it doesn't look like a test", fpath)
-    return list(result)
-
-
-def process_results(
-    result_folder: str,
-    server_log_path: str,
-) -> Tuple[str, str, TestResults, List[str]]:
-    test_results = []  # type: TestResults
-    additional_files = []
-    # Just upload all files from result_folder.
-    # If task provides processed results, then it's responsible for content of result_folder.
-    if os.path.exists(result_folder):
-        test_files = [
-            f
-            for f in os.listdir(result_folder)
-            if os.path.isfile(os.path.join(result_folder, f))
-        ]
-        additional_files = [os.path.join(result_folder, f) for f in test_files]
-
-    if os.path.exists(server_log_path):
-        server_log_files = [
-            f
-            for f in os.listdir(server_log_path)
-            if os.path.isfile(os.path.join(server_log_path, f))
-        ]
-        additional_files = additional_files + [
-            os.path.join(server_log_path, f) for f in server_log_files
-        ]
-
-    status = []
-    status_path = os.path.join(result_folder, "check_status.tsv")
-    if os.path.exists(status_path):
-        logging.info("Found test_results.tsv")
-        with open(status_path, "r", encoding="utf-8") as status_file:
-            status = list(csv.reader(status_file, delimiter="\t"))
-
-    if len(status) != 1 or len(status[0]) != 2:
-        logging.info("Files in result folder %s", os.listdir(result_folder))
-        return "error", "Invalid check_status.tsv", test_results, additional_files
-    state, description = status[0][0], status[0][1]
-
-    try:
-        results_path = Path(result_folder) / "test_results.tsv"
-
-        if results_path.exists():
-            logging.info("Found test_results.tsv")
-        else:
-            logging.info("Files in result folder %s", os.listdir(result_folder))
-            return "error", "Not found test_results.tsv", test_results, additional_files
-
-        test_results = read_test_results(results_path)
-        if len(test_results) == 0:
-            return "error", "Empty test_results.tsv", test_results, additional_files
-    except Exception as e:
-        return (
-            "error",
-            f"Cannot parse test_results.tsv ({e})",
-            test_results,
-            additional_files,
-        )
-
-    return state, description, test_results, additional_files
-
-
 def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("check_name")
@ -255,16 +133,12 @@ def main():
    temp_path = TEMP_PATH
    repo_path = REPO_COPY
    reports_path = REPORTS_PATH
-    # post_commit_path = os.path.join(temp_path, "functional_commit_status.tsv")

    args = parse_args()
    check_name = args.check_name
    kill_timeout = args.kill_timeout
    validate_bugfix_check = args.validate_bugfix

-    # flaky_check = "flaky" in check_name.lower()
-
-    # run_changed_tests = flaky_check or validate_bugfix_check
    run_changed_tests = validate_bugfix_check
    gh = Github(get_best_robot_token(), per_page=100)

@ -276,20 +150,9 @@ def main():
    commit = get_commit(gh, pr_info.sha)
    atexit.register(update_mergeable_check, gh, pr_info, check_name)

-    if not os.path.exists(temp_path):
+    if not Path(temp_path).exists():
        os.makedirs(temp_path)

-    # if validate_bugfix_check and "pr-bugfix" not in pr_info.labels:
-    #     if args.post_commit_status == "file":
-    #         post_commit_status_to_file(
-    #             post_commit_path,
-    #             f"Skipped (no pr-bugfix in {pr_info.labels})",
-    #             "success",
-    #             "null",
-    #         )
-    #     logging.info("Skipping '%s' (no pr-bugfix in %s)", check_name, pr_info.labels)
-    #     sys.exit(0)
-
    if "RUN_BY_HASH_NUM" in os.environ:
        run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM", "0"))
        run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL", "0"))
@ -306,67 +169,30 @@ def main():
        logging.info("Check is already finished according to github status, exiting")
        sys.exit(0)

-    # tests_to_run = []
-    # if run_changed_tests:
-    #     tests_to_run = get_tests_to_run(pr_info)
-    #     if not tests_to_run:
-    #         state = override_status("success", check_name, validate_bugfix_check)
-    #         if args.post_commit_status == "commit_status":
-    #             post_commit_status(
-    #                 commit,
-    #                 state,
-    #                 NotSet,
-    #                 NO_CHANGES_MSG,
-    #                 check_name_with_group,
-    #                 pr_info,
-    #             )
-    #         elif args.post_commit_status == "file":
-    #             post_commit_status_to_file(
-    #                 post_commit_path,
-    #                 description=NO_CHANGES_MSG,
-    #                 state=state,
-    #                 report_url="null",
-    #             )
-    #         sys.exit(0)
-
-    # image_name = "clickhouse/libfuzzer-test"  # get_image_name(check_name)
-    # docker_image = docker_build_image(
-    #     image_name, Path("../../docker/test/libfuzzer/")
-    # )
    docker_image = get_image_with_version(reports_path, "clickhouse/libfuzzer")

-    fuzzers_path = os.path.join(temp_path, "fuzzers")
-    if not os.path.exists(fuzzers_path):
+    fuzzers_path = Path(temp_path) / "fuzzers"
+    if not fuzzers_path.exists():
        os.makedirs(fuzzers_path)

    download_fuzzers(check_name, reports_path, fuzzers_path)

    for file in os.listdir(fuzzers_path):
        if file.endswith("_fuzzer"):
-            os.chmod(os.path.join(fuzzers_path, file), 0o777)
+            os.chmod(fuzzers_path / file, 0o777)
        elif file.endswith("_seed_corpus.zip"):
-            corpus_path = os.path.join(
-                fuzzers_path, file.removesuffix("_seed_corpus.zip") + ".in"
-            )
-            zipfile.ZipFile(os.path.join(fuzzers_path, file), "r").extractall(
-                corpus_path
-            )
+            corpus_path = fuzzers_path / (file.removesuffix("_seed_corpus.zip") + ".in")
+            zipfile.ZipFile(fuzzers_path / file, "r").extractall(corpus_path)

-    # server_log_path = os.path.join(temp_path, "server_log")
-    # if not os.path.exists(server_log_path):
-    #     os.makedirs(server_log_path)
-
-    result_path = os.path.join(temp_path, "result_path")
-    if not os.path.exists(result_path):
+    result_path = Path(temp_path) / "result_path"
+    if not result_path.exists():
        os.makedirs(result_path)

-    run_log_path = os.path.join(result_path, "run.log")
+    run_log_path = result_path / "run.log"

    additional_envs = get_additional_envs(
        check_name, run_by_hash_num, run_by_hash_total
    )
-    # if validate_bugfix_check:
-    #     additional_envs.append("GLOBAL_TAGS=no-random-settings")

    ci_logs_credentials = CiLogsCredentials(Path(temp_path) / "export-logs-config.sh")
    ci_logs_args = ci_logs_credentials.get_docker_arguments(
@ -374,22 +200,16 @@ def main():
    )

    run_command = get_run_command(
-        #        check_name,
        fuzzers_path,
-        repo_path,
+        Path(repo_path),
        result_path,
-        #        server_log_path,
        kill_timeout,
        additional_envs,
        ci_logs_args,
        docker_image,
-        #        flaky_check,
-        #        tests_to_run,
    )
    logging.info("Going to run libFuzzer tests: %s", run_command)

-    # sys.exit(0)
-
    with TeePopen(run_command, run_log_path) as process:
        retcode = process.wait()
        if retcode == 0:
@ -399,64 +219,6 @@ def main():

    sys.exit(0)

-    # try:
-    #     subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
-    # except subprocess.CalledProcessError:
-    #     logging.warning("Failed to change files owner in %s, ignoring it", temp_path)
-
-    # ci_logs_credentials.clean_ci_logs_from_credentials(Path(run_log_path))
-    # s3_helper = S3Helper()
-
-    # state, description, test_results, additional_logs = process_results(
-    #     result_path, server_log_path
-    # )
-    # state = override_status(state, check_name, invert=validate_bugfix_check)
-
-    # ch_helper = ClickHouseHelper()
-
-    # report_url = upload_results(
-    #     s3_helper,
-    #     pr_info.number,
-    #     pr_info.sha,
-    #     test_results,
-    #     [run_log_path] + additional_logs,
-    #     check_name_with_group,
-    # )
-
-    # print(f"::notice:: {check_name} Report url: {report_url}")
-    # if args.post_commit_status == "commit_status":
-    #     post_commit_status(
-    #         commit, state, report_url, description, check_name_with_group, pr_info
-    #     )
-    # elif args.post_commit_status == "file":
-    #     post_commit_status_to_file(
-    #         post_commit_path,
-    #         description,
-    #         state,
-    #         report_url,
-    #     )
-    # else:
-    #     raise Exception(
-    #         f'Unknown post_commit_status option "{args.post_commit_status}"'
-    #     )
-
-    # prepared_events = prepare_tests_results_for_clickhouse(
-    #     pr_info,
-    #     test_results,
-    #     state,
-    #     stopwatch.duration_seconds,
-    #     stopwatch.start_time_str,
-    #     report_url,
-    #     check_name_with_group,
-    # )
-    # ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
-
-    # if state != "success":
-    #     if FORCE_TESTS_LABEL in pr_info.labels:
-    #         print(f"'{FORCE_TESTS_LABEL}' enabled, will report success")
-    #     else:
-    #         sys.exit(1)
-

 if __name__ == "__main__":
    main()
--- a/tests/ci/tee_popen.py
+++ b/tests/ci/tee_popen.py
@ -55,6 +55,7 @@ class TeePopen:
            stderr=STDOUT,
            stdout=PIPE,
            bufsize=1,
+            errors="backslashreplace",
        )
        if self.timeout is not None and self.timeout > 0:
            t = Thread(target=self._check_timeout)