Merge pull request #70052 from ClickHouse/backport/24.9/69864

Backport #69864 to 24.9: Improve stateless test runner
2024-11-22 15:42:02 +00:00 · 2024-09-27 05:54:59 +02:00 · 2024-09-27 05:54:59 +02:00 · 4ed0de28c6
commit 4ed0de28c6
parent 964a35964a 0b4b15b590
3 changed files with 51 additions and 16 deletions
--- a/tests/docker_scripts/process_functional_tests_result.py
+++ b/tests/docker_scripts/process_functional_tests_result.py
@ -135,15 +135,18 @@ def process_test_log(log_path, broken_tests):
    )
-def process_result(result_path, broken_tests):
+def process_result(result_path, broken_tests, in_test_result_file, in_results_file):
    test_results = []
    state = "success"
    description = ""
    files = os.listdir(result_path)
    test_results_path = result_path
    if files:
        logging.info("Find files in result folder %s", ",".join(files))
-        result_path = os.path.join(result_path, "test_result.txt")
+        test_results_path = os.path.join(result_path, in_results_file)
        result_path = os.path.join(result_path, in_test_result_file)
    else:
        test_results_path = None
        result_path = None
        description = "No output log"
        state = "error"
@ -161,6 +164,27 @@ def process_result(result_path, broken_tests):
            retries,
            test_results,
        ) = process_test_log(result_path, broken_tests)
        # Check test_results.tsv for sanitizer asserts, crashes and other critical errors.
        # If the file is present, it's expected to be generated by stress_test.lib check for critical errors
        # In the end this file will be fully regenerated, including both results from critical errors check and
        # functional test results.
        if test_results_path and os.path.exists(test_results_path):
            with open(test_results_path, "r", encoding="utf-8") as test_results_file:
                existing_test_results = list(
                    csv.reader(test_results_file, delimiter="\t")
                )
                for test in existing_test_results:
                    if len(test) < 2:
                        unknown += 1
                    else:
                        test_results.append(test)
                        if test[1] != "OK":
                            failed += 1
                        else:
                            success += 1
        is_flaky_check = 1 < int(os.environ.get("NUM_TRIES", 1))
        logging.info("Is flaky check: %s", is_flaky_check)
        # If no tests were run (success == 0) it indicates an error (e.g. server did not start or crashed immediately)
@ -219,6 +243,8 @@ if __name__ == "__main__":
        description="ClickHouse script for parsing results of functional tests"
    )
    parser.add_argument("--in-results-dir", default="/test_output/")
    parser.add_argument("--in-test-result-file", default="test_result.txt")
    parser.add_argument("--in-results-file", default="test_results.tsv")
    parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
    parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
    parser.add_argument("--broken-tests", default="/repo/tests/analyzer_tech_debt.txt")
@ -231,7 +257,12 @@ if __name__ == "__main__":
            broken_tests = f.read().splitlines()
        print(f"Broken tests in the list: {len(broken_tests)}")
-    state, description, test_results = process_result(args.in_results_dir, broken_tests)
+    state, description, test_results = process_result(
        args.in_results_dir,
        broken_tests,
        args.in_test_result_file,
        args.in_results_file,
    )
    logging.info("Result parsed")
    status = (state, description)
--- a/tests/docker_scripts/stateless_runner.sh
+++ b/tests/docker_scripts/stateless_runner.sh
@ -350,31 +350,31 @@ ls -la ./
 echo "Files in root directory"
 ls -la /
 /repo/tests/docker_scripts/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
 clickhouse-client -q "system flush logs" ||:
 # stop logs replication to make it possible to dump logs tables via clickhouse-local
 stop_logs_replication
 logs_saver_client_options="--max_block_size 8192 --max_memory_usage 10G --max_threads 1 --max_result_rows 0 --max_result_bytes 0 --max_bytes_to_read 0"
 # Try to get logs while server is running
 failed_to_save_logs=0
 for table in query_log zookeeper_log trace_log transactions_info_log metric_log blob_storage_log error_log
 do
-    if ! clickhouse-client -q "select * from system.$table into outfile '/test_output/$table.tsv.zst' format TSVWithNamesAndTypes"; then
+    if ! clickhouse-client ${logs_saver_client_options} -q "select * from system.$table into outfile '/test_output/$table.tsv.zst' format TSVWithNamesAndTypes"; then
        failed_to_save_logs=1
    fi
    if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
-        if ! clickhouse-client --port 19000 -q "select * from system.$table into outfile '/test_output/$table.1.tsv.zst' format TSVWithNamesAndTypes"; then
+        if ! clickhouse-client ${logs_saver_client_options} --port 19000 -q "select * from system.$table into outfile '/test_output/$table.1.tsv.zst' format TSVWithNamesAndTypes"; then
            failed_to_save_logs=1
        fi
-        if ! clickhouse-client --port 29000 -q "select * from system.$table into outfile '/test_output/$table.2.tsv.zst' format TSVWithNamesAndTypes"; then
+        if ! clickhouse-client ${logs_saver_client_options} --port 29000 -q "select * from system.$table into outfile '/test_output/$table.2.tsv.zst' format TSVWithNamesAndTypes"; then
            failed_to_save_logs=1
        fi
    fi
    if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
-        if ! clickhouse-client --port 29000 -q "select * from system.$table into outfile '/test_output/$table.2.tsv.zst' format TSVWithNamesAndTypes"; then
+        if ! clickhouse-client ${logs_saver_client_options} --port 29000 -q "select * from system.$table into outfile '/test_output/$table.2.tsv.zst' format TSVWithNamesAndTypes"; then
            failed_to_save_logs=1
        fi
    fi
@ -385,8 +385,8 @@ done
 # wait for minio to flush its batch if it has any
 sleep 1
 clickhouse-client -q "SYSTEM FLUSH ASYNC INSERT QUEUE"
-clickhouse-client --max_block_size 8192 --max_memory_usage 10G --max_threads 1 --max_result_bytes 0 --max_result_rows 0 --max_rows_to_read 0 --max_bytes_to_read 0 -q "SELECT log FROM minio_audit_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_audit_logs.jsonl.zst' FORMAT JSONEachRow"
+clickhouse-client ${logs_saver_client_options} -q "SELECT log FROM minio_audit_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_audit_logs.jsonl.zst' FORMAT JSONEachRow"
-clickhouse-client --max_block_size 8192 --max_memory_usage 10G --max_threads 1 --max_result_bytes 0 --max_result_rows 0 --max_rows_to_read 0 --max_bytes_to_read 0 -q "SELECT log FROM minio_server_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_server_logs.jsonl.zst' FORMAT JSONEachRow"
+clickhouse-client ${logs_saver_client_options} -q "SELECT log FROM minio_server_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_server_logs.jsonl.zst' FORMAT JSONEachRow"
 # Stop server so we can safely read data with clickhouse-local.
 # Why do we read data with clickhouse-local?
@ -428,15 +428,15 @@ if [ $failed_to_save_logs -ne 0 ]; then
    #   for files >64MB, we want this files to be compressed explicitly
    for table in query_log zookeeper_log trace_log transactions_info_log metric_log blob_storage_log error_log
    do
-        clickhouse-local "$data_path_config" --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||:
+        clickhouse-local ${logs_saver_client_options} "$data_path_config" --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||:
        if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
-            clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||:
+            clickhouse-local ${logs_saver_client_options} --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||:
-            clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||:
+            clickhouse-local ${logs_saver_client_options} --path /var/lib/clickhouse2/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||:
        fi
        if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
-            clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||:
+            clickhouse-local ${logs_saver_client_options} --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||:
        fi
    done
 fi
@ -460,6 +460,10 @@ done
 # Grep logs for sanitizer asserts, crashes and other critical errors
 check_logs_for_critical_errors
 # Check test_result.txt with test results and test_results.tsv generated by grepping logs before
 /repo/tests/docker_scripts/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
 # Compressed (FIXME: remove once only github actions will be left)
 rm /var/log/clickhouse-server/clickhouse-server.log
 mv /var/log/clickhouse-server/stderr.log /test_output/ ||:
--- a/tests/docker_scripts/stress_tests.lib
+++ b/tests/docker_scripts/stress_tests.lib
@ -288,7 +288,7 @@ function check_logs_for_critical_errors()
    # Remove file fatal_messages.txt if it's empty
    [ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt
-    rg -Faz "########################################" /test_output/* > /dev/null \
+    rg -Faz "########################################" /test_output/* | rg -v "rg -Faz " > /dev/null \
      && echo -e "Killed by signal (output files)$FAIL" >> /test_output/test_results.tsv
    function get_gdb_log_context()