Merge remote-tracking branch 'origin' into integration-2

2024-11-22 23:52:03 +00:00 · 2021-05-19 14:15:52 +03:00 · 2021-05-19 14:15:52 +03:00 · 8932f794aa
commit 8932f794aa
parent 1f4e28accb 0b8bf34aba
2 changed files with 46 additions and 17 deletions
--- a/src/Common/Allocator.h
+++ b/src/Common/Allocator.h
@ -99,9 +99,17 @@ public:
    /// Free memory range.
    void free(void * buf, size_t size)
    {
-        checkSize(size);
-        freeNoTrack(buf, size);
-        CurrentMemoryTracker::free(size);
+        try
+        {
+            checkSize(size);
+            freeNoTrack(buf, size);
+            CurrentMemoryTracker::free(size);
+        }
+        catch (...)
+        {
+            DB::tryLogCurrentException("Allocator::free");
+            throw;
+        }
    }

    /** Enlarge memory range.
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@ -760,8 +760,7 @@ class ClickHouseCluster:
        run_and_check(self.base_cmd + ["up", "--force-recreate", "--no-deps", "-d", node.name])
        node.ip_address = self.get_instance_ip(node.name)
        node.client = Client(node.ip_address, command=self.client_bin_path)
-        start_deadline = time.time() + 180.0  # seconds
-        node.wait_for_start(start_deadline)
+        node.wait_for_start(start_timeout=20.0, connection_timeout=600.0)  # seconds
        return node

    def restart_service(self, service_name):
@ -1273,13 +1272,13 @@ class ClickHouseCluster:
            subprocess_check_call(clickhouse_start_cmd)
            logging.debug("ClickHouse instance created")

-            start_deadline = time.time() + 180.0  # seconds
+            start_timeout = 60.0  # seconds
            for instance in self.instances.values():
                instance.docker_client = self.docker_client
                instance.ip_address = self.get_instance_ip(instance.name)

                logging.debug("Waiting for ClickHouse start...")
-                instance.wait_for_start(start_deadline)
+                instance.wait_for_start(start_timeout)
                logging.debug("ClickHouse started")

                instance.client = Client(instance.ip_address, command=self.client_bin_path)
@ -1758,32 +1757,54 @@ class ClickHouseInstance:
    def start(self):
        self.get_docker_handle().start()

-    def wait_for_start(self, deadline=None, timeout=None):
-        start_time = time.time()
+    def wait_for_start(self, start_timeout=None, connection_timeout=None):

-        if timeout is not None:
-            deadline = start_time + timeout
+        if start_timeout is None or start_timeout <= 0:
+            raise Exception("Invalid timeout: {}".format(start_timeout))
+
+        if connection_timeout is not None and connection_timeout < start_timeout:
+            raise Exception("Connection timeout {} should be grater then start timeout {}"
+                            .format(connection_timeout, start_timeout))
+
+        start_time = time.time()
+        prev_rows_in_log = 0
+
+        def has_new_rows_in_log():
+            nonlocal prev_rows_in_log
+            try:
+                rows_in_log = int(self.count_in_log(".*").strip())
+                res = rows_in_log > prev_rows_in_log
+                prev_rows_in_log = rows_in_log
+                return res
+            except ValueError:
+                return False

        while True:
            handle = self.get_docker_handle()
            status = handle.status
            if status == 'exited':
-                raise Exception(
-                    "Instance `{}' failed to start. Container status: {}, logs: {}".format(self.name, status,
-                                                                                           handle.logs().decode('utf-8')))
+                raise Exception("Instance `{}' failed to start. Container status: {}, logs: {}"
+                                .format(self.name, status, handle.logs().decode('utf-8')))
+
+            deadline = start_time + start_timeout
+            # It is possible that server starts slowly.
+            # If container is running, and there is some progress in log, check connection_timeout.
+            if connection_timeout and status == 'running' and has_new_rows_in_log():
+                deadline = start_time + connection_timeout

            current_time = time.time()
-            time_left = deadline - current_time
-            if deadline is not None and current_time >= deadline:
+            if current_time >= deadline:
                raise Exception("Timed out while waiting for instance `{}' with ip address {} to start. "
                                "Container status: {}, logs: {}".format(self.name, self.ip_address, status,
                                                                        handle.logs().decode('utf-8')))

+            socket_timeout = min(start_timeout, deadline - current_time)
+
            # Repeatedly poll the instance address until there is something that listens there.
            # Usually it means that ClickHouse is ready to accept queries.
            try:
                sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-                sock.settimeout(time_left)
+                sock.settimeout(socket_timeout)
                sock.connect((self.ip_address, 9000))
                self.is_up = True
                return