From f0c930bf9dd7a980cbbb83f78c3f5288d11c2985 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 17 Mar 2021 22:28:18 +0300 Subject: [PATCH] remove query timeouts --- tests/integration/ci-runner.py | 37 ++++++++++++++----- tests/integration/helpers/cluster.py | 8 ++-- .../test_dictionaries_postgresql/test.py | 5 ++- tests/integration/test_drop_replica/test.py | 1 - 4 files changed, 35 insertions(+), 16 deletions(-) diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index b2d60f7dc19..eb82d86b38b 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -19,8 +19,8 @@ CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH = "/usr/bin/clickhouse-odbc-bridge" TRIES_COUNT = 10 MAX_TIME_SECONDS = 3600 -# NOTE it must be less then timeout in Sandbox -TASK_TIMEOUT = 7.5 * 60 * 60 +MAX_TIME_IN_SANDBOX = 20 * 60 # 20 minutes +TASK_TIMEOUT = 8 * 60 * 60 # 8 hours def get_tests_to_run(pr_info): result = set([]) @@ -167,7 +167,7 @@ class ClickhouseIntegrationTestsRunner: self.shuffle_groups = self.params['shuffle_test_groups'] self.flaky_check = 'flaky check' in self.params['context_name'] self.start_time = time.time() - self.soft_deadline_time = self.start_time + TASK_TIMEOUT + self.soft_deadline_time = self.start_time + (TASK_TIMEOUT - MAX_TIME_IN_SANDBOX) def path(self): return self.result_path @@ -274,16 +274,27 @@ class ClickhouseIntegrationTestsRunner: def _update_counters(self, main_counters, current_counters): for test in current_counters["PASSED"]: - if test not in main_counters["PASSED"]: + if test not in main_counters["PASSED"] and test not in main_counters["FLAKY"]: + is_flaky = False if test in main_counters["FAILED"]: main_counters["FAILED"].remove(test) + is_flaky = True if test in main_counters["ERROR"]: main_counters["ERROR"].remove(test) - main_counters["PASSED"].append(test) + is_flaky = True + + if is_flaky: + main_counters["FLAKY"].append(test) + else: + main_counters["PASSED"].append(test) for state in ("ERROR", "FAILED"): for test in current_counters[state]: + if test in main_counters["FLAKY"]: + continue if test in main_counters["PASSED"]: + main_counters["PASSED"].remove(test) + main_counters["FLAKY"].append(test) continue if test not in main_counters[state]: main_counters[state].append(test) @@ -309,12 +320,15 @@ class ClickhouseIntegrationTestsRunner: "ERROR": [], "PASSED": [], "FAILED": [], + "SKIPPED": [], + "FLAKY": [], } tests_times = defaultdict(float) if self.soft_deadline_time < time.time(): for test in tests_in_group: - counters["ERROR"].append(test) + logging.info("Task timeout exceeded, skipping %s", test) + counters["SKIPPED"].append(test) tests_times[test] = 0 log_name = None log_path = None @@ -361,10 +375,10 @@ class ClickhouseIntegrationTestsRunner: for test_name, test_time in new_tests_times.items(): tests_times[test_name] = test_time os.remove(output_path) - if len(counters["PASSED"]) == len(tests_in_group): + if len(counters["PASSED"]) + len(counters["FLAKY"]) == len(tests_in_group): logging.info("All tests from group %s passed", test_group) break - if len(counters["PASSED"]) >= 0 and len(counters["FAILED"]) == 0 and len(counters["ERROR"]) == 0: + if len(counters["PASSED"]) + len(counters["FLAKY"]) >= 0 and len(counters["FAILED"]) == 0 and len(counters["ERROR"]) == 0: logging.info("Seems like all tests passed but some of them are skipped or deselected. Ignoring them and finishing group.") break else: @@ -407,6 +421,7 @@ class ClickhouseIntegrationTestsRunner: # NOTE "error" result state will restart the whole test task, so we use "failure" here result_state = "failure" break + assert len(counters["FLAKY"]) == 0 logging.info("Try is OK, all tests passed, going to clear env") clear_ip_tables_and_restart_daemons() logging.info("And going to sleep for some time") @@ -448,6 +463,8 @@ class ClickhouseIntegrationTestsRunner: "ERROR": [], "PASSED": [], "FAILED": [], + "SKIPPED": [], + "FLAKY": [], } tests_times = defaultdict(float) @@ -499,12 +516,14 @@ class ClickhouseIntegrationTestsRunner: text_state = state test_result += [(c, text_state, "{:.2f}".format(tests_times[c])) for c in counters[state]] - status_text = "fail: {}, passed: {}, error: {}".format(len(counters['FAILED']), len(counters['PASSED']), len(counters['ERROR'])) + failed_sum = len(counters['FAILED']) + len(counters['ERROR']) + status_text = "fail: {}, passed: {}, flaky: {}".format(failed_sum, len(counters['PASSED']), len(counters['FLAKY'])) if self.soft_deadline_time < time.time(): status_text = "Timeout, " + status_text result_state = "failure" + counters['FLAKY'] = [] if not counters or sum(len(counter) for counter in counters.values()) == 0: status_text = "No tests found for some reason! It's a bug" result_state = "failure" diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 4adde53b6b8..3872234d36c 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -969,12 +969,12 @@ class ClickHouseInstance: return "-fsanitize=address" in build_opts # Connects to the instance via clickhouse-client, sends a query (1st argument) and returns the answer - def query(self, sql, stdin=None, timeout=60, settings=None, user=None, password=None, database=None, + def query(self, sql, stdin=None, timeout=None, settings=None, user=None, password=None, database=None, ignore_error=False): return self.client.query(sql, stdin=stdin, timeout=timeout, settings=settings, user=user, password=password, database=database, ignore_error=ignore_error) - def query_with_retry(self, sql, stdin=None, timeout=10, settings=None, user=None, password=None, database=None, + def query_with_retry(self, sql, stdin=None, timeout=None, settings=None, user=None, password=None, database=None, ignore_error=False, retry_count=20, sleep_time=0.5, check_callback=lambda x: True): result = None @@ -998,13 +998,13 @@ class ClickHouseInstance: return self.client.get_query_request(*args, **kwargs) # Connects to the instance via clickhouse-client, sends a query (1st argument), expects an error and return its code - def query_and_get_error(self, sql, stdin=None, timeout=60, settings=None, user=None, password=None, + def query_and_get_error(self, sql, stdin=None, timeout=None, settings=None, user=None, password=None, database=None): return self.client.query_and_get_error(sql, stdin=stdin, timeout=timeout, settings=settings, user=user, password=password, database=database) # The same as query_and_get_error but ignores successful query. - def query_and_get_answer_with_error(self, sql, stdin=None, timeout=60, settings=None, user=None, password=None, + def query_and_get_answer_with_error(self, sql, stdin=None, timeout=None, settings=None, user=None, password=None, database=None): return self.client.query_and_get_answer_with_error(sql, stdin=stdin, timeout=timeout, settings=settings, user=user, password=password, database=database) diff --git a/tests/integration/test_dictionaries_postgresql/test.py b/tests/integration/test_dictionaries_postgresql/test.py index b83c00409af..0e83cc28085 100644 --- a/tests/integration/test_dictionaries_postgresql/test.py +++ b/tests/integration/test_dictionaries_postgresql/test.py @@ -82,11 +82,11 @@ def test_load_dictionaries(started_cluster): def test_invalidate_query(started_cluster): conn = get_postgres_conn(True) cursor = conn.cursor() - table_name = 'test0' + table_name = 'test1' create_and_fill_postgres_table(table_name) # invalidate query: SELECT value FROM test0 WHERE id = 0 - dict_name = 'dict0' + dict_name = 'dict1' create_dict(table_name) node1.query("SYSTEM RELOAD DICTIONARY {}".format(dict_name)) assert node1.query("SELECT dictGetUInt32('{}', 'value', toUInt64(0))".format(dict_name)) == "0\n" @@ -111,6 +111,7 @@ def test_invalidate_query(started_cluster): time.sleep(5) assert node1.query("SELECT dictGetUInt32('{}', 'value', toUInt64(0))".format(dict_name)) == '2\n' assert node1.query("SELECT dictGetUInt32('{}', 'value', toUInt64(1))".format(dict_name)) == '2\n' + cursor.execute("DROP TABLE IF EXISTS {}".format(table_name)) if __name__ == '__main__': diff --git a/tests/integration/test_drop_replica/test.py b/tests/integration/test_drop_replica/test.py index 7d7ad784166..947eaa2dfa1 100644 --- a/tests/integration/test_drop_replica/test.py +++ b/tests/integration/test_drop_replica/test.py @@ -105,7 +105,6 @@ def test_drop_replica(start_cluster): with PartitionManager() as pm: ## make node_1_1 dead pm.drop_instance_zk_connections(node_1_1) - time.sleep(10) assert "doesn't exist" in node_1_3.query_and_get_error( "SYSTEM DROP REPLICA 'node_1_1' FROM TABLE test.test_table")