Fix clickhouse-test --no-drop-if-fail on reference mismatch

Previusly it worked only for the case when the clickhouse-client/script
exited with non 0, because the check was done before checking the
reference file.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
This commit is contained in:
Azat Khuzhin 2023-09-04 16:16:52 +02:00
parent e192d4c624
commit f2ef2ec012

View File

@ -1238,15 +1238,6 @@ class TestCase:
).total_seconds() < args.timeout and proc.poll() is None:
sleep(0.01)
need_drop_database = not args.database
if need_drop_database and args.no_drop_if_fail:
maybe_passed = (
(proc.returncode == 0)
and (proc.stderr is None)
and (proc.stdout is None or "Exception" not in proc.stdout)
)
need_drop_database = maybe_passed
debug_log = ""
if os.path.exists(self.testcase_args.debug_log_file):
with open(self.testcase_args.debug_log_file, "rb") as stream:
@ -1254,65 +1245,6 @@ class TestCase:
debug_log += str(stream.read(), errors="replace", encoding="utf-8")
debug_log += "\n"
if need_drop_database:
seconds_left = max(
args.timeout - (datetime.now() - start_time).total_seconds(), 20
)
# Check if the test does not cleanup its tables.
# Only for newly added tests. Please extend this check to the old tests as well.
if self.case_file >= "02800":
leftover_tables = (
clickhouse_execute(
args,
f"SHOW TABLES FROM {database}",
timeout=seconds_left,
settings={
"log_comment": args.testcase_basename,
},
)
.decode()
.replace("\n", ", ")
)
if len(leftover_tables) != 0:
raise Exception(
f"The test should cleanup its tables ({leftover_tables}), otherwise it is inconvenient for running it locally."
)
drop_database_query = f"DROP DATABASE IF EXISTS {database}"
if args.replicated_database:
drop_database_query += " ON CLUSTER test_cluster_database_replicated"
try:
# It's possible to get an error "New table appeared in database being dropped or detached. Try again."
for _ in range(1, 60):
try:
clickhouse_execute(
args,
drop_database_query,
timeout=seconds_left,
settings={
"log_comment": args.testcase_basename,
},
)
except HTTPError as e:
if need_retry(args, e.message, e.message, 0):
continue
raise
break
except socket.timeout:
total_time = (datetime.now() - start_time).total_seconds()
return (
None,
"",
f"Timeout dropping database {database} after test",
debug_log,
total_time,
)
shutil.rmtree(args.test_tmp_dir)
total_time = (datetime.now() - start_time).total_seconds()
# Normalize randomized database names in stdout, stderr files.
@ -1341,6 +1273,8 @@ class TestCase:
return proc, stdout, stderr, debug_log, total_time
def run(self, args, suite, client_options, server_logs_level):
start_time = datetime.now()
try:
skip_reason = self.should_skip_test(suite)
if skip_reason is not None:
@ -1376,40 +1310,118 @@ class TestCase:
if result.status == TestStatus.FAIL:
result.description = self.add_info_about_settings(result.description)
self._cleanup(result.status == TestStatus.OK)
return result
except KeyboardInterrupt as e:
raise e
except HTTPError:
total_time = (datetime.now() - start_time).total_seconds()
return TestResult(
self.name,
TestStatus.FAIL,
FailureReason.INTERNAL_QUERY_FAIL,
0.0,
total_time,
self.add_info_about_settings(
self.get_description_from_exception_info(sys.exc_info())
),
)
except socket.timeout:
total_time = (datetime.now() - start_time).total_seconds()
return TestResult(
self.name,
TestStatus.FAIL,
FailureReason.INTERNAL_QUERY_FAIL,
total_time,
self.add_info_about_settings(
self.get_description_from_exception_info(sys.exc_info())
),
)
except (ConnectionError, http.client.ImproperConnectionState):
total_time = (datetime.now() - start_time).total_seconds()
return TestResult(
self.name,
TestStatus.FAIL,
FailureReason.SERVER_DIED,
0.0,
total_time,
self.add_info_about_settings(
self.get_description_from_exception_info(sys.exc_info())
),
)
except Exception:
total_time = (datetime.now() - start_time).total_seconds()
return TestResult(
self.name,
TestStatus.UNKNOWN,
FailureReason.INTERNAL_ERROR,
0.0,
total_time,
self.get_description_from_exception_info(sys.exc_info()),
)
finally:
self.remove_random_settings_from_env()
def _cleanup(self, passed):
args = self.testcase_args
need_cleanup = not args.database
if need_cleanup and args.no_drop_if_fail:
need_cleanup = passed
if not need_cleanup:
return
time_passed = (datetime.now() - args.testcase_start_time).total_seconds()
timeout = max(args.timeout - time_passed, 20)
self._cleanup_database(args, timeout)
shutil.rmtree(args.test_tmp_dir)
def _cleanup_database(self, args, timeout):
database = args.testcase_database
# Check if the test does not cleanup its tables.
# Only for newly added tests. Please extend this check to the old tests as well.
if self.case_file >= "02800":
leftover_tables = (
clickhouse_execute(
args,
f"SHOW TABLES FROM {database}",
timeout=timeout,
settings={
"log_comment": args.testcase_basename,
},
)
.decode()
.replace("\n", ", ")
)
if len(leftover_tables) != 0:
raise Exception(
f"The test should cleanup its tables ({leftover_tables}), otherwise it is inconvenient for running it locally."
)
drop_database_query = f"DROP DATABASE IF EXISTS {database}"
if args.replicated_database:
drop_database_query += " ON CLUSTER test_cluster_database_replicated"
# It's possible to get an error "New table appeared in database being dropped or detached. Try again."
for _ in range(1, 60):
try:
clickhouse_execute(
args,
drop_database_query,
timeout=timeout,
settings={
"log_comment": args.testcase_basename,
},
)
except HTTPError as e:
if need_retry(args, e.message, e.message, 0):
continue
raise
break
class TestSuite:
@staticmethod
@ -2505,7 +2517,7 @@ def parse_args():
parser.add_argument(
"--no-drop-if-fail",
action="store_true",
help="Do not drop database for test if test has failed (does not work if reference file mismatch)",
help="Do not drop database for test if test has failed",
)
parser.add_argument(
"--hide-db-name",