mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 00:30:49 +00:00
CI: Functional tests to store artifacts on timeout
This commit is contained in:
parent
8d0d870326
commit
427016a450
@ -256,22 +256,6 @@ function configure
|
|||||||
rm -f "$FASTTEST_DATA/config.d/secure_ports.xml"
|
rm -f "$FASTTEST_DATA/config.d/secure_ports.xml"
|
||||||
}
|
}
|
||||||
|
|
||||||
function timeout_with_logging() {
|
|
||||||
local exit_code=0
|
|
||||||
|
|
||||||
timeout -s TERM --preserve-status "${@}" || exit_code="${?}"
|
|
||||||
|
|
||||||
echo "Checking if it is a timeout. The code 124 will indicate a timeout."
|
|
||||||
if [[ "${exit_code}" -eq "124" ]]
|
|
||||||
then
|
|
||||||
echo "The command 'timeout ${*}' has been killed by timeout."
|
|
||||||
else
|
|
||||||
echo "No, it isn't a timeout."
|
|
||||||
fi
|
|
||||||
|
|
||||||
return $exit_code
|
|
||||||
}
|
|
||||||
|
|
||||||
function run_tests
|
function run_tests
|
||||||
{
|
{
|
||||||
clickhouse-server --version
|
clickhouse-server --version
|
||||||
@ -340,7 +324,7 @@ case "$stage" in
|
|||||||
configure 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/install_log.txt"
|
configure 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/install_log.txt"
|
||||||
;&
|
;&
|
||||||
"run_tests")
|
"run_tests")
|
||||||
timeout_with_logging 35m bash -c run_tests ||:
|
run_tests ||:
|
||||||
/process_functional_tests_result.py --in-results-dir "$FASTTEST_OUTPUT/" \
|
/process_functional_tests_result.py --in-results-dir "$FASTTEST_OUTPUT/" \
|
||||||
--out-results-file "$FASTTEST_OUTPUT/test_results.tsv" \
|
--out-results-file "$FASTTEST_OUTPUT/test_results.tsv" \
|
||||||
--out-status-file "$FASTTEST_OUTPUT/check_status.tsv" || echo -e "failure\tCannot parse results" > "$FASTTEST_OUTPUT/check_status.tsv"
|
--out-status-file "$FASTTEST_OUTPUT/check_status.tsv" || echo -e "failure\tCannot parse results" > "$FASTTEST_OUTPUT/check_status.tsv"
|
||||||
|
@ -35,7 +35,6 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \
|
|||||||
|
|
||||||
|
|
||||||
ENV TZ=Europe/Amsterdam
|
ENV TZ=Europe/Amsterdam
|
||||||
ENV MAX_RUN_TIME=9000
|
|
||||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||||
|
|
||||||
ARG sqllogic_test_repo="https://github.com/gregrahn/sqllogictest.git"
|
ARG sqllogic_test_repo="https://github.com/gregrahn/sqllogictest.git"
|
||||||
|
@ -94,7 +94,7 @@ function run_tests()
|
|||||||
|
|
||||||
export -f run_tests
|
export -f run_tests
|
||||||
|
|
||||||
timeout "${MAX_RUN_TIME:-9000}" bash -c run_tests || echo "timeout reached" >&2
|
run_tests
|
||||||
|
|
||||||
#/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
|
#/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
|
||||||
|
|
||||||
|
@ -22,7 +22,6 @@ ARG sqltest_repo="https://github.com/elliotchance/sqltest/"
|
|||||||
RUN git clone ${sqltest_repo}
|
RUN git clone ${sqltest_repo}
|
||||||
|
|
||||||
ENV TZ=UTC
|
ENV TZ=UTC
|
||||||
ENV MAX_RUN_TIME=900
|
|
||||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||||
|
|
||||||
COPY run.sh /
|
COPY run.sh /
|
||||||
|
@ -4,9 +4,6 @@
|
|||||||
source /setup_export_logs.sh
|
source /setup_export_logs.sh
|
||||||
set -e -x
|
set -e -x
|
||||||
|
|
||||||
MAX_RUN_TIME=${MAX_RUN_TIME:-3600}
|
|
||||||
MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 3600 : MAX_RUN_TIME))
|
|
||||||
|
|
||||||
# Choose random timezone for this test run
|
# Choose random timezone for this test run
|
||||||
TZ="$(rg -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)"
|
TZ="$(rg -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)"
|
||||||
echo "Choosen random timezone $TZ"
|
echo "Choosen random timezone $TZ"
|
||||||
@ -123,9 +120,6 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]
|
|||||||
|
|
||||||
clickhouse-client --query "DROP TABLE datasets.hits_v1"
|
clickhouse-client --query "DROP TABLE datasets.hits_v1"
|
||||||
clickhouse-client --query "DROP TABLE datasets.visits_v1"
|
clickhouse-client --query "DROP TABLE datasets.visits_v1"
|
||||||
|
|
||||||
MAX_RUN_TIME=$((MAX_RUN_TIME < 9000 ? MAX_RUN_TIME : 9000)) # min(MAX_RUN_TIME, 2.5 hours)
|
|
||||||
MAX_RUN_TIME=$((MAX_RUN_TIME != 0 ? MAX_RUN_TIME : 9000)) # set to 2.5 hours if 0 (unlimited)
|
|
||||||
else
|
else
|
||||||
clickhouse-client --query "CREATE DATABASE test"
|
clickhouse-client --query "CREATE DATABASE test"
|
||||||
clickhouse-client --query "SHOW TABLES FROM test"
|
clickhouse-client --query "SHOW TABLES FROM test"
|
||||||
@ -257,24 +251,7 @@ function run_tests()
|
|||||||
|
|
||||||
export -f run_tests
|
export -f run_tests
|
||||||
|
|
||||||
function timeout_with_logging() {
|
run_tests ||:
|
||||||
local exit_code=0
|
|
||||||
|
|
||||||
timeout -s TERM --preserve-status "${@}" || exit_code="${?}"
|
|
||||||
|
|
||||||
echo "Checking if it is a timeout. The code 124 will indicate a timeout."
|
|
||||||
if [[ "${exit_code}" -eq "124" ]]
|
|
||||||
then
|
|
||||||
echo "The command 'timeout ${*}' has been killed by timeout."
|
|
||||||
else
|
|
||||||
echo "No, it isn't a timeout."
|
|
||||||
fi
|
|
||||||
|
|
||||||
return $exit_code
|
|
||||||
}
|
|
||||||
|
|
||||||
TIMEOUT=$((MAX_RUN_TIME - 700))
|
|
||||||
timeout_with_logging "$TIMEOUT" bash -c run_tests ||:
|
|
||||||
|
|
||||||
echo "Files in current directory"
|
echo "Files in current directory"
|
||||||
ls -la ./
|
ls -la ./
|
||||||
|
@ -65,7 +65,6 @@ ENV TZ=Europe/Amsterdam
|
|||||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||||
|
|
||||||
ENV NUM_TRIES=1
|
ENV NUM_TRIES=1
|
||||||
ENV MAX_RUN_TIME=0
|
|
||||||
|
|
||||||
# Unrelated to vars in setup_minio.sh, but should be the same there
|
# Unrelated to vars in setup_minio.sh, but should be the same there
|
||||||
# to have the same binaries for local running scenario
|
# to have the same binaries for local running scenario
|
||||||
|
@ -12,9 +12,6 @@ dmesg --clear
|
|||||||
# fail on errors, verbose and export all env variables
|
# fail on errors, verbose and export all env variables
|
||||||
set -e -x -a
|
set -e -x -a
|
||||||
|
|
||||||
MAX_RUN_TIME=${MAX_RUN_TIME:-9000}
|
|
||||||
MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 9000 : MAX_RUN_TIME))
|
|
||||||
|
|
||||||
USE_DATABASE_REPLICATED=${USE_DATABASE_REPLICATED:=0}
|
USE_DATABASE_REPLICATED=${USE_DATABASE_REPLICATED:=0}
|
||||||
USE_SHARED_CATALOG=${USE_SHARED_CATALOG:=0}
|
USE_SHARED_CATALOG=${USE_SHARED_CATALOG:=0}
|
||||||
|
|
||||||
@ -308,8 +305,6 @@ function run_tests()
|
|||||||
|
|
||||||
try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')"
|
try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')"
|
||||||
|
|
||||||
TIMEOUT=$((MAX_RUN_TIME - 800 > 8400 ? 8400 : MAX_RUN_TIME - 800))
|
|
||||||
START_TIME=${SECONDS}
|
|
||||||
set +e
|
set +e
|
||||||
|
|
||||||
TEST_ARGS=(
|
TEST_ARGS=(
|
||||||
@ -324,32 +319,22 @@ function run_tests()
|
|||||||
--test-runs "$NUM_TRIES"
|
--test-runs "$NUM_TRIES"
|
||||||
"${ADDITIONAL_OPTIONS[@]}"
|
"${ADDITIONAL_OPTIONS[@]}"
|
||||||
)
|
)
|
||||||
timeout --preserve-status --signal TERM --kill-after 60m ${TIMEOUT}s clickhouse-test "${TEST_ARGS[@]}" 2>&1 \
|
clickhouse-test "${TEST_ARGS[@]}" 2>&1 \
|
||||||
| ts '%Y-%m-%d %H:%M:%S' \
|
| ts '%Y-%m-%d %H:%M:%S' \
|
||||||
| tee -a test_output/test_result.txt
|
| tee -a test_output/test_result.txt
|
||||||
set -e
|
set -e
|
||||||
DURATION=$((SECONDS - START_TIME))
|
|
||||||
|
|
||||||
echo "Elapsed ${DURATION} seconds."
|
|
||||||
if [[ $DURATION -ge $TIMEOUT ]]
|
|
||||||
then
|
|
||||||
echo "It looks like the command is terminated by the timeout, which is ${TIMEOUT} seconds."
|
|
||||||
fi
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export -f run_tests
|
export -f run_tests
|
||||||
|
|
||||||
|
|
||||||
# This should be enough to setup job and collect artifacts
|
|
||||||
TIMEOUT=$((MAX_RUN_TIME - 700))
|
|
||||||
if [ "$NUM_TRIES" -gt "1" ]; then
|
if [ "$NUM_TRIES" -gt "1" ]; then
|
||||||
# We don't run tests with Ordinary database in PRs, only in master.
|
# We don't run tests with Ordinary database in PRs, only in master.
|
||||||
# So run new/changed tests with Ordinary at least once in flaky check.
|
# So run new/changed tests with Ordinary at least once in flaky check.
|
||||||
timeout_with_logging "$TIMEOUT" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \
|
NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests \
|
||||||
| sed 's/All tests have finished/Redacted: a message about tests finish is deleted/' | sed 's/No tests were run/Redacted: a message about no tests run is deleted/' ||:
|
| sed 's/All tests have finished/Redacted: a message about tests finish is deleted/' | sed 's/No tests were run/Redacted: a message about no tests run is deleted/' ||:
|
||||||
fi
|
fi
|
||||||
|
|
||||||
timeout_with_logging "$TIMEOUT" bash -c run_tests ||:
|
run_tests ||:
|
||||||
|
|
||||||
echo "Files in current directory"
|
echo "Files in current directory"
|
||||||
ls -la ./
|
ls -la ./
|
||||||
|
@ -40,22 +40,6 @@ function fn_exists() {
|
|||||||
declare -F "$1" > /dev/null;
|
declare -F "$1" > /dev/null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function timeout_with_logging() {
|
|
||||||
local exit_code=0
|
|
||||||
|
|
||||||
timeout -s TERM --preserve-status "${@}" || exit_code="${?}"
|
|
||||||
|
|
||||||
echo "Checking if it is a timeout. The code 124 will indicate a timeout."
|
|
||||||
if [[ "${exit_code}" -eq "124" ]]
|
|
||||||
then
|
|
||||||
echo "The command 'timeout ${*}' has been killed by timeout."
|
|
||||||
else
|
|
||||||
echo "No, it isn't a timeout."
|
|
||||||
fi
|
|
||||||
|
|
||||||
return $exit_code
|
|
||||||
}
|
|
||||||
|
|
||||||
function collect_core_dumps()
|
function collect_core_dumps()
|
||||||
{
|
{
|
||||||
find . -type f -maxdepth 1 -name 'core.*' | while read -r core; do
|
find . -type f -maxdepth 1 -name 'core.*' | while read -r core; do
|
||||||
|
@ -50,7 +50,6 @@ from github_helper import GitHub
|
|||||||
from pr_info import PRInfo
|
from pr_info import PRInfo
|
||||||
from report import (
|
from report import (
|
||||||
ERROR,
|
ERROR,
|
||||||
FAILURE,
|
|
||||||
PENDING,
|
PENDING,
|
||||||
SUCCESS,
|
SUCCESS,
|
||||||
BuildResult,
|
BuildResult,
|
||||||
@ -62,11 +61,11 @@ from report import (
|
|||||||
FAIL,
|
FAIL,
|
||||||
)
|
)
|
||||||
from s3_helper import S3Helper
|
from s3_helper import S3Helper
|
||||||
from stopwatch import Stopwatch
|
|
||||||
from tee_popen import TeePopen
|
from tee_popen import TeePopen
|
||||||
from ci_cache import CiCache
|
from ci_cache import CiCache
|
||||||
from ci_settings import CiSettings
|
from ci_settings import CiSettings
|
||||||
from ci_buddy import CIBuddy
|
from ci_buddy import CIBuddy
|
||||||
|
from stopwatch import Stopwatch
|
||||||
from version_helper import get_version_from_repo
|
from version_helper import get_version_from_repo
|
||||||
|
|
||||||
# pylint: disable=too-many-lines
|
# pylint: disable=too-many-lines
|
||||||
@ -370,8 +369,8 @@ def _pre_action(s3, job_name, batch, indata, pr_info):
|
|||||||
# skip_status = SUCCESS already there
|
# skip_status = SUCCESS already there
|
||||||
GH.print_in_group("Commit Status Data", job_status)
|
GH.print_in_group("Commit Status Data", job_status)
|
||||||
|
|
||||||
# create pre report
|
# create dummy report
|
||||||
jr = JobReport.create_pre_report(status=skip_status, job_skipped=to_be_skipped)
|
jr = JobReport.create_dummy(status=skip_status, job_skipped=to_be_skipped)
|
||||||
jr.dump()
|
jr.dump()
|
||||||
|
|
||||||
if not to_be_skipped:
|
if not to_be_skipped:
|
||||||
@ -990,19 +989,20 @@ def _run_test(job_name: str, run_command: str) -> int:
|
|||||||
stopwatch = Stopwatch()
|
stopwatch = Stopwatch()
|
||||||
job_log = Path(TEMP_PATH) / "job_log.txt"
|
job_log = Path(TEMP_PATH) / "job_log.txt"
|
||||||
with TeePopen(run_command, job_log, env, timeout) as process:
|
with TeePopen(run_command, job_log, env, timeout) as process:
|
||||||
|
print(f"Job process started, pid [{process.process.pid}]")
|
||||||
retcode = process.wait()
|
retcode = process.wait()
|
||||||
if retcode != 0:
|
if retcode != 0:
|
||||||
print(f"Run action failed for: [{job_name}] with exit code [{retcode}]")
|
print(f"Run action failed for: [{job_name}] with exit code [{retcode}]")
|
||||||
if timeout and process.timeout_exceeded:
|
if process.timeout_exceeded:
|
||||||
print(f"Timeout {timeout} exceeded, dumping the job report")
|
print(f"Job timed out: [{job_name}] exit code [{retcode}]")
|
||||||
JobReport(
|
assert JobReport.exist(), "JobReport real or dummy must be present"
|
||||||
status=FAILURE,
|
jr = JobReport.load()
|
||||||
description=f"Timeout {timeout} exceeded",
|
if jr.dummy:
|
||||||
test_results=[TestResult.create_check_timeout_expired(timeout)],
|
print(
|
||||||
start_time=stopwatch.start_time_str,
|
f"ERROR: Run action failed with timeout and did not generate JobReport - update dummy report with execution time"
|
||||||
duration=stopwatch.duration_seconds,
|
)
|
||||||
additional_files=[job_log],
|
jr.test_results = [TestResult.create_check_timeout_expired()]
|
||||||
).dump()
|
jr.duration = stopwatch.duration_seconds
|
||||||
|
|
||||||
print(f"Run action done for: [{job_name}]")
|
print(f"Run action done for: [{job_name}]")
|
||||||
return retcode
|
return retcode
|
||||||
@ -1205,7 +1205,7 @@ def main() -> int:
|
|||||||
job_report
|
job_report
|
||||||
), "BUG. There must be job report either real report, or pre-report if job was killed"
|
), "BUG. There must be job report either real report, or pre-report if job was killed"
|
||||||
error_description = ""
|
error_description = ""
|
||||||
if not job_report.pre_report:
|
if not job_report.dummy:
|
||||||
# it's a real job report
|
# it's a real job report
|
||||||
ch_helper = ClickHouseHelper()
|
ch_helper = ClickHouseHelper()
|
||||||
check_url = ""
|
check_url = ""
|
||||||
|
@ -332,7 +332,7 @@ class JobConfig:
|
|||||||
# will be triggered for the job if omitted in CI workflow yml
|
# will be triggered for the job if omitted in CI workflow yml
|
||||||
run_command: str = ""
|
run_command: str = ""
|
||||||
# job timeout, seconds
|
# job timeout, seconds
|
||||||
timeout: Optional[int] = None
|
timeout: int = 7200
|
||||||
# sets number of batches for a multi-batch job
|
# sets number of batches for a multi-batch job
|
||||||
num_batches: int = 1
|
num_batches: int = 1
|
||||||
# label that enables job in CI, if set digest isn't used
|
# label that enables job in CI, if set digest isn't used
|
||||||
@ -421,7 +421,6 @@ class CommonJobConfigs:
|
|||||||
),
|
),
|
||||||
run_command='functional_test_check.py "$CHECK_NAME"',
|
run_command='functional_test_check.py "$CHECK_NAME"',
|
||||||
runner_type=Runners.FUNC_TESTER,
|
runner_type=Runners.FUNC_TESTER,
|
||||||
timeout=9000,
|
|
||||||
)
|
)
|
||||||
STATEFUL_TEST = JobConfig(
|
STATEFUL_TEST = JobConfig(
|
||||||
job_name_keyword="stateful",
|
job_name_keyword="stateful",
|
||||||
|
@ -5,10 +5,11 @@ import csv
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import signal
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Tuple
|
from typing import List, Tuple, Optional
|
||||||
|
|
||||||
from build_download_helper import download_all_deb_packages
|
from build_download_helper import download_all_deb_packages
|
||||||
from clickhouse_helper import CiLogsCredentials
|
from clickhouse_helper import CiLogsCredentials
|
||||||
@ -25,11 +26,12 @@ from report import (
|
|||||||
TestResults,
|
TestResults,
|
||||||
read_test_results,
|
read_test_results,
|
||||||
FAILURE,
|
FAILURE,
|
||||||
|
TestResult,
|
||||||
)
|
)
|
||||||
from stopwatch import Stopwatch
|
from stopwatch import Stopwatch
|
||||||
from tee_popen import TeePopen
|
from tee_popen import TeePopen
|
||||||
from ci_config import CI
|
from ci_config import CI
|
||||||
from ci_utils import Utils
|
from ci_utils import Utils, Shell
|
||||||
|
|
||||||
NO_CHANGES_MSG = "Nothing to run"
|
NO_CHANGES_MSG = "Nothing to run"
|
||||||
|
|
||||||
@ -113,10 +115,6 @@ def get_run_command(
|
|||||||
|
|
||||||
if flaky_check:
|
if flaky_check:
|
||||||
envs.append("-e NUM_TRIES=50")
|
envs.append("-e NUM_TRIES=50")
|
||||||
envs.append("-e MAX_RUN_TIME=2800")
|
|
||||||
else:
|
|
||||||
max_run_time = os.getenv("MAX_RUN_TIME", "0")
|
|
||||||
envs.append(f"-e MAX_RUN_TIME={max_run_time}")
|
|
||||||
|
|
||||||
envs += [f"-e {e}" for e in additional_envs]
|
envs += [f"-e {e}" for e in additional_envs]
|
||||||
|
|
||||||
@ -128,7 +126,7 @@ def get_run_command(
|
|||||||
)
|
)
|
||||||
|
|
||||||
return (
|
return (
|
||||||
f"docker run --volume={builds_path}:/package_folder "
|
f"docker run --rm --name func-tester --volume={builds_path}:/package_folder "
|
||||||
# For dmesg and sysctl
|
# For dmesg and sysctl
|
||||||
"--privileged "
|
"--privileged "
|
||||||
f"{ci_logs_args}"
|
f"{ci_logs_args}"
|
||||||
@ -198,7 +196,7 @@ def process_results(
|
|||||||
state, description = status[0][0], status[0][1]
|
state, description = status[0][0], status[0][1]
|
||||||
if ret_code != 0:
|
if ret_code != 0:
|
||||||
state = ERROR
|
state = ERROR
|
||||||
description += " (but script exited with an error)"
|
description = f"Job failed, exit code: {ret_code}. " + description
|
||||||
|
|
||||||
try:
|
try:
|
||||||
results_path = result_directory / "test_results.tsv"
|
results_path = result_directory / "test_results.tsv"
|
||||||
@ -240,7 +238,19 @@ def parse_args():
|
|||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
test_process = None # type: Optional[TeePopen]
|
||||||
|
timeout_expired = False
|
||||||
|
|
||||||
|
|
||||||
|
def handle_sigterm(signum, _frame):
|
||||||
|
print(f"WARNING: Received signal {signum}")
|
||||||
|
global timeout_expired
|
||||||
|
timeout_expired = True
|
||||||
|
Shell.check(f"docker exec func-tester pkill -f clickhouse-test", verbose=True)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
signal.signal(signal.SIGTERM, handle_sigterm)
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
for handler in logging.root.handlers:
|
for handler in logging.root.handlers:
|
||||||
# pylint: disable=protected-access
|
# pylint: disable=protected-access
|
||||||
@ -328,11 +338,13 @@ def main():
|
|||||||
logging.info("Going to run func tests: %s", run_command)
|
logging.info("Going to run func tests: %s", run_command)
|
||||||
|
|
||||||
with TeePopen(run_command, run_log_path) as process:
|
with TeePopen(run_command, run_log_path) as process:
|
||||||
|
global test_process
|
||||||
|
test_process = process
|
||||||
retcode = process.wait()
|
retcode = process.wait()
|
||||||
if retcode == 0:
|
if retcode == 0:
|
||||||
logging.info("Run successfully")
|
logging.info("Run successfully")
|
||||||
else:
|
else:
|
||||||
logging.info("Run failed")
|
logging.info("Run failed, exit code %s", retcode)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
subprocess.check_call(
|
subprocess.check_call(
|
||||||
@ -348,6 +360,13 @@ def main():
|
|||||||
state, description, test_results, additional_logs = process_results(
|
state, description, test_results, additional_logs = process_results(
|
||||||
retcode, result_path, server_log_path
|
retcode, result_path, server_log_path
|
||||||
)
|
)
|
||||||
|
if timeout_expired:
|
||||||
|
description = "Timeout expired"
|
||||||
|
state = FAILURE
|
||||||
|
test_results.insert(
|
||||||
|
0, TestResult.create_check_timeout_expired(stopwatch.duration_seconds)
|
||||||
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
print(
|
print(
|
||||||
"This is validate bugfix or flaky check run, but no changes test to run - skip with success"
|
"This is validate bugfix or flaky check run, but no changes test to run - skip with success"
|
||||||
|
@ -249,6 +249,7 @@ JOB_REPORT_FILE = Path(GITHUB_WORKSPACE) / "job_report.json"
|
|||||||
|
|
||||||
JOB_STARTED_TEST_NAME = "STARTED"
|
JOB_STARTED_TEST_NAME = "STARTED"
|
||||||
JOB_FINISHED_TEST_NAME = "COMPLETED"
|
JOB_FINISHED_TEST_NAME = "COMPLETED"
|
||||||
|
JOB_TIMEOUT_TEST_NAME = "Job Timeout Expired"
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@ -277,8 +278,8 @@ class TestResult:
|
|||||||
self.log_files.append(log_path)
|
self.log_files.append(log_path)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def create_check_timeout_expired(timeout: float) -> "TestResult":
|
def create_check_timeout_expired(duration: Optional[float] = None) -> "TestResult":
|
||||||
return TestResult("Check timeout expired", "FAIL", timeout)
|
return TestResult(JOB_TIMEOUT_TEST_NAME, "FAIL", time=duration)
|
||||||
|
|
||||||
|
|
||||||
TestResults = List[TestResult]
|
TestResults = List[TestResult]
|
||||||
@ -303,7 +304,7 @@ class JobReport:
|
|||||||
# indicates that this is not real job report but report for the job that was skipped by rerun check
|
# indicates that this is not real job report but report for the job that was skipped by rerun check
|
||||||
job_skipped: bool = False
|
job_skipped: bool = False
|
||||||
# indicates that report generated by CI script in order to check later if job was killed before real report is generated
|
# indicates that report generated by CI script in order to check later if job was killed before real report is generated
|
||||||
pre_report: bool = False
|
dummy: bool = False
|
||||||
exit_code: int = -1
|
exit_code: int = -1
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -311,7 +312,7 @@ class JobReport:
|
|||||||
return datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
|
return datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_pre_report(cls, status: str, job_skipped: bool) -> "JobReport":
|
def create_dummy(cls, status: str, job_skipped: bool) -> "JobReport":
|
||||||
return JobReport(
|
return JobReport(
|
||||||
status=status,
|
status=status,
|
||||||
description="",
|
description="",
|
||||||
@ -320,7 +321,7 @@ class JobReport:
|
|||||||
duration=0.0,
|
duration=0.0,
|
||||||
additional_files=[],
|
additional_files=[],
|
||||||
job_skipped=job_skipped,
|
job_skipped=job_skipped,
|
||||||
pre_report=True,
|
dummy=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
def update_duration(self):
|
def update_duration(self):
|
||||||
|
@ -2,6 +2,8 @@
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import signal
|
||||||
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
from io import TextIOWrapper
|
from io import TextIOWrapper
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@ -30,20 +32,35 @@ class TeePopen:
|
|||||||
self._process = None # type: Optional[Popen]
|
self._process = None # type: Optional[Popen]
|
||||||
self.timeout = timeout
|
self.timeout = timeout
|
||||||
self.timeout_exceeded = False
|
self.timeout_exceeded = False
|
||||||
|
self.terminated_by_sigterm = False
|
||||||
|
self.terminated_by_sigkill = False
|
||||||
|
self.pid = 0
|
||||||
|
|
||||||
def _check_timeout(self) -> None:
|
def _check_timeout(self) -> None:
|
||||||
if self.timeout is None:
|
if self.timeout is None:
|
||||||
return
|
return
|
||||||
sleep(self.timeout)
|
sleep(self.timeout)
|
||||||
|
logging.warning(
|
||||||
|
"Timeout exceeded. Send SIGTERM to process %s, timeout %s",
|
||||||
|
self.process.pid,
|
||||||
|
self.timeout,
|
||||||
|
)
|
||||||
|
self.send_signal(signal.SIGTERM)
|
||||||
|
time_wait = 0
|
||||||
|
self.terminated_by_sigterm = True
|
||||||
self.timeout_exceeded = True
|
self.timeout_exceeded = True
|
||||||
|
while self.process.poll() is None and time_wait < 100:
|
||||||
|
print("wait...")
|
||||||
|
wait = 5
|
||||||
|
sleep(wait)
|
||||||
|
time_wait += wait
|
||||||
while self.process.poll() is None:
|
while self.process.poll() is None:
|
||||||
logging.warning(
|
logging.error(
|
||||||
"Killing process %s, timeout %s exceeded",
|
"Process is still running. Send SIGKILL",
|
||||||
self.process.pid,
|
|
||||||
self.timeout,
|
|
||||||
)
|
)
|
||||||
os.killpg(self.process.pid, 9)
|
self.send_signal(signal.SIGKILL)
|
||||||
sleep(10)
|
self.terminated_by_sigkill = True
|
||||||
|
sleep(5)
|
||||||
|
|
||||||
def __enter__(self) -> "TeePopen":
|
def __enter__(self) -> "TeePopen":
|
||||||
self.process = Popen(
|
self.process = Popen(
|
||||||
@ -57,6 +74,9 @@ class TeePopen:
|
|||||||
bufsize=1,
|
bufsize=1,
|
||||||
errors="backslashreplace",
|
errors="backslashreplace",
|
||||||
)
|
)
|
||||||
|
sleep(1)
|
||||||
|
self.pid = self._get_child_pid()
|
||||||
|
print(f"Subprocess started, pid [{self.process.pid}], child pid [{self.pid}]")
|
||||||
if self.timeout is not None and self.timeout > 0:
|
if self.timeout is not None and self.timeout > 0:
|
||||||
t = Thread(target=self._check_timeout)
|
t = Thread(target=self._check_timeout)
|
||||||
t.daemon = True # does not block the program from exit
|
t.daemon = True # does not block the program from exit
|
||||||
@ -77,6 +97,22 @@ class TeePopen:
|
|||||||
|
|
||||||
self.log_file.close()
|
self.log_file.close()
|
||||||
|
|
||||||
|
def _get_child_pid(self):
|
||||||
|
# linux only
|
||||||
|
ps_command = f"ps --ppid {self.process.pid} -o pid="
|
||||||
|
res = "NA"
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
ps_command, shell=True, capture_output=True, text=True
|
||||||
|
)
|
||||||
|
res = result.stdout.strip()
|
||||||
|
pid = int(res)
|
||||||
|
return pid
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Failed to get child's pid, command [{ps_command}], result [{res}]")
|
||||||
|
print(f"ERROR: getting Python subprocess PID: {e}")
|
||||||
|
return self.process.pid
|
||||||
|
|
||||||
def wait(self) -> int:
|
def wait(self) -> int:
|
||||||
if self.process.stdout is not None:
|
if self.process.stdout is not None:
|
||||||
for line in self.process.stdout:
|
for line in self.process.stdout:
|
||||||
@ -85,6 +121,15 @@ class TeePopen:
|
|||||||
|
|
||||||
return self.process.wait()
|
return self.process.wait()
|
||||||
|
|
||||||
|
def poll(self):
|
||||||
|
return self.process.poll()
|
||||||
|
|
||||||
|
def send_signal(self, signal_num):
|
||||||
|
if self.pid:
|
||||||
|
os.kill(self.pid, signal_num)
|
||||||
|
else:
|
||||||
|
print("ERROR: no process to send signal")
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def process(self) -> Popen:
|
def process(self) -> Popen:
|
||||||
if self._process is not None:
|
if self._process is not None:
|
||||||
|
Loading…
Reference in New Issue
Block a user