Merge pull request #70052 from ClickHouse/backport/24.9/69864

Backport #69864 to 24.9: Improve stateless test runner
This commit is contained in:
robot-ch-test-poll4 2024-09-27 05:54:59 +02:00 committed by GitHub
commit 4ed0de28c6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 51 additions and 16 deletions

View File

@ -135,15 +135,18 @@ def process_test_log(log_path, broken_tests):
) )
def process_result(result_path, broken_tests): def process_result(result_path, broken_tests, in_test_result_file, in_results_file):
test_results = [] test_results = []
state = "success" state = "success"
description = "" description = ""
files = os.listdir(result_path) files = os.listdir(result_path)
test_results_path = result_path
if files: if files:
logging.info("Find files in result folder %s", ",".join(files)) logging.info("Find files in result folder %s", ",".join(files))
result_path = os.path.join(result_path, "test_result.txt") test_results_path = os.path.join(result_path, in_results_file)
result_path = os.path.join(result_path, in_test_result_file)
else: else:
test_results_path = None
result_path = None result_path = None
description = "No output log" description = "No output log"
state = "error" state = "error"
@ -161,6 +164,27 @@ def process_result(result_path, broken_tests):
retries, retries,
test_results, test_results,
) = process_test_log(result_path, broken_tests) ) = process_test_log(result_path, broken_tests)
# Check test_results.tsv for sanitizer asserts, crashes and other critical errors.
# If the file is present, it's expected to be generated by stress_test.lib check for critical errors
# In the end this file will be fully regenerated, including both results from critical errors check and
# functional test results.
if test_results_path and os.path.exists(test_results_path):
with open(test_results_path, "r", encoding="utf-8") as test_results_file:
existing_test_results = list(
csv.reader(test_results_file, delimiter="\t")
)
for test in existing_test_results:
if len(test) < 2:
unknown += 1
else:
test_results.append(test)
if test[1] != "OK":
failed += 1
else:
success += 1
is_flaky_check = 1 < int(os.environ.get("NUM_TRIES", 1)) is_flaky_check = 1 < int(os.environ.get("NUM_TRIES", 1))
logging.info("Is flaky check: %s", is_flaky_check) logging.info("Is flaky check: %s", is_flaky_check)
# If no tests were run (success == 0) it indicates an error (e.g. server did not start or crashed immediately) # If no tests were run (success == 0) it indicates an error (e.g. server did not start or crashed immediately)
@ -219,6 +243,8 @@ if __name__ == "__main__":
description="ClickHouse script for parsing results of functional tests" description="ClickHouse script for parsing results of functional tests"
) )
parser.add_argument("--in-results-dir", default="/test_output/") parser.add_argument("--in-results-dir", default="/test_output/")
parser.add_argument("--in-test-result-file", default="test_result.txt")
parser.add_argument("--in-results-file", default="test_results.tsv")
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv") parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv") parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
parser.add_argument("--broken-tests", default="/repo/tests/analyzer_tech_debt.txt") parser.add_argument("--broken-tests", default="/repo/tests/analyzer_tech_debt.txt")
@ -231,7 +257,12 @@ if __name__ == "__main__":
broken_tests = f.read().splitlines() broken_tests = f.read().splitlines()
print(f"Broken tests in the list: {len(broken_tests)}") print(f"Broken tests in the list: {len(broken_tests)}")
state, description, test_results = process_result(args.in_results_dir, broken_tests) state, description, test_results = process_result(
args.in_results_dir,
broken_tests,
args.in_test_result_file,
args.in_results_file,
)
logging.info("Result parsed") logging.info("Result parsed")
status = (state, description) status = (state, description)

View File

@ -350,31 +350,31 @@ ls -la ./
echo "Files in root directory" echo "Files in root directory"
ls -la / ls -la /
/repo/tests/docker_scripts/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
clickhouse-client -q "system flush logs" ||: clickhouse-client -q "system flush logs" ||:
# stop logs replication to make it possible to dump logs tables via clickhouse-local # stop logs replication to make it possible to dump logs tables via clickhouse-local
stop_logs_replication stop_logs_replication
logs_saver_client_options="--max_block_size 8192 --max_memory_usage 10G --max_threads 1 --max_result_rows 0 --max_result_bytes 0 --max_bytes_to_read 0"
# Try to get logs while server is running # Try to get logs while server is running
failed_to_save_logs=0 failed_to_save_logs=0
for table in query_log zookeeper_log trace_log transactions_info_log metric_log blob_storage_log error_log for table in query_log zookeeper_log trace_log transactions_info_log metric_log blob_storage_log error_log
do do
if ! clickhouse-client -q "select * from system.$table into outfile '/test_output/$table.tsv.zst' format TSVWithNamesAndTypes"; then if ! clickhouse-client ${logs_saver_client_options} -q "select * from system.$table into outfile '/test_output/$table.tsv.zst' format TSVWithNamesAndTypes"; then
failed_to_save_logs=1 failed_to_save_logs=1
fi fi
if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
if ! clickhouse-client --port 19000 -q "select * from system.$table into outfile '/test_output/$table.1.tsv.zst' format TSVWithNamesAndTypes"; then if ! clickhouse-client ${logs_saver_client_options} --port 19000 -q "select * from system.$table into outfile '/test_output/$table.1.tsv.zst' format TSVWithNamesAndTypes"; then
failed_to_save_logs=1 failed_to_save_logs=1
fi fi
if ! clickhouse-client --port 29000 -q "select * from system.$table into outfile '/test_output/$table.2.tsv.zst' format TSVWithNamesAndTypes"; then if ! clickhouse-client ${logs_saver_client_options} --port 29000 -q "select * from system.$table into outfile '/test_output/$table.2.tsv.zst' format TSVWithNamesAndTypes"; then
failed_to_save_logs=1 failed_to_save_logs=1
fi fi
fi fi
if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
if ! clickhouse-client --port 29000 -q "select * from system.$table into outfile '/test_output/$table.2.tsv.zst' format TSVWithNamesAndTypes"; then if ! clickhouse-client ${logs_saver_client_options} --port 29000 -q "select * from system.$table into outfile '/test_output/$table.2.tsv.zst' format TSVWithNamesAndTypes"; then
failed_to_save_logs=1 failed_to_save_logs=1
fi fi
fi fi
@ -385,8 +385,8 @@ done
# wait for minio to flush its batch if it has any # wait for minio to flush its batch if it has any
sleep 1 sleep 1
clickhouse-client -q "SYSTEM FLUSH ASYNC INSERT QUEUE" clickhouse-client -q "SYSTEM FLUSH ASYNC INSERT QUEUE"
clickhouse-client --max_block_size 8192 --max_memory_usage 10G --max_threads 1 --max_result_bytes 0 --max_result_rows 0 --max_rows_to_read 0 --max_bytes_to_read 0 -q "SELECT log FROM minio_audit_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_audit_logs.jsonl.zst' FORMAT JSONEachRow" clickhouse-client ${logs_saver_client_options} -q "SELECT log FROM minio_audit_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_audit_logs.jsonl.zst' FORMAT JSONEachRow"
clickhouse-client --max_block_size 8192 --max_memory_usage 10G --max_threads 1 --max_result_bytes 0 --max_result_rows 0 --max_rows_to_read 0 --max_bytes_to_read 0 -q "SELECT log FROM minio_server_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_server_logs.jsonl.zst' FORMAT JSONEachRow" clickhouse-client ${logs_saver_client_options} -q "SELECT log FROM minio_server_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_server_logs.jsonl.zst' FORMAT JSONEachRow"
# Stop server so we can safely read data with clickhouse-local. # Stop server so we can safely read data with clickhouse-local.
# Why do we read data with clickhouse-local? # Why do we read data with clickhouse-local?
@ -428,15 +428,15 @@ if [ $failed_to_save_logs -ne 0 ]; then
# for files >64MB, we want this files to be compressed explicitly # for files >64MB, we want this files to be compressed explicitly
for table in query_log zookeeper_log trace_log transactions_info_log metric_log blob_storage_log error_log for table in query_log zookeeper_log trace_log transactions_info_log metric_log blob_storage_log error_log
do do
clickhouse-local "$data_path_config" --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||: clickhouse-local ${logs_saver_client_options} "$data_path_config" --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||:
if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||: clickhouse-local ${logs_saver_client_options} --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||:
clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||: clickhouse-local ${logs_saver_client_options} --path /var/lib/clickhouse2/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||:
fi fi
if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||: clickhouse-local ${logs_saver_client_options} --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||:
fi fi
done done
fi fi
@ -460,6 +460,10 @@ done
# Grep logs for sanitizer asserts, crashes and other critical errors # Grep logs for sanitizer asserts, crashes and other critical errors
check_logs_for_critical_errors check_logs_for_critical_errors
# Check test_result.txt with test results and test_results.tsv generated by grepping logs before
/repo/tests/docker_scripts/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
# Compressed (FIXME: remove once only github actions will be left) # Compressed (FIXME: remove once only github actions will be left)
rm /var/log/clickhouse-server/clickhouse-server.log rm /var/log/clickhouse-server/clickhouse-server.log
mv /var/log/clickhouse-server/stderr.log /test_output/ ||: mv /var/log/clickhouse-server/stderr.log /test_output/ ||:

View File

@ -288,7 +288,7 @@ function check_logs_for_critical_errors()
# Remove file fatal_messages.txt if it's empty # Remove file fatal_messages.txt if it's empty
[ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt [ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt
rg -Faz "########################################" /test_output/* > /dev/null \ rg -Faz "########################################" /test_output/* | rg -v "rg -Faz " > /dev/null \
&& echo -e "Killed by signal (output files)$FAIL" >> /test_output/test_results.tsv && echo -e "Killed by signal (output files)$FAIL" >> /test_output/test_results.tsv
function get_gdb_log_context() function get_gdb_log_context()