Merge pull request #14578 from ClickHouse/retries_in_s3_downloader

Retries in s3 downloader
2024-11-22 07:31:57 +00:00 · 2020-09-08 12:53:09 +03:00 · 2020-09-08 12:53:09 +03:00 · 4aad57de87
commit 4aad57de87
parent 11a247d2f4 c40ba48822
2 changed files with 68 additions and 38 deletions
--- a/docker/test/stateful/s3downloader
+++ b/docker/test/stateful/s3downloader
@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 import os
 import sys
+import time
 import tarfile
 import logging
 import argparse
@ -16,6 +17,8 @@ AVAILABLE_DATASETS = {
    'visits': 'visits_v1.tar',
 }

+RETRIES_COUNT = 5
+
 def _get_temp_file_name():
    return os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()))

@ -24,25 +27,37 @@ def build_url(base_url, dataset):

 def dowload_with_progress(url, path):
    logging.info("Downloading from %s to temp path %s", url, path)
-    with open(path, 'w') as f:
-        response = requests.get(url, stream=True)
-        response.raise_for_status()
-        total_length = response.headers.get('content-length')
-        if total_length is None or int(total_length) == 0:
-            logging.info("No content-length, will download file without progress")
-            f.write(response.content)
-        else:
-            dl = 0
-            total_length = int(total_length)
-            logging.info("Content length is %ld bytes", total_length)
-            for data in response.iter_content(chunk_size=4096):
-                dl += len(data)
-                f.write(data)
-                if sys.stdout.isatty():
-                    done = int(50 * dl / total_length)
-                    percent = int(100 * float(dl) / total_length)
-                    sys.stdout.write("\r[{}{}] {}%".format('=' * done, ' ' * (50-done), percent))
-                    sys.stdout.flush()
+    for i in range(RETRIES_COUNT):
+        try:
+            with open(path, 'w') as f:
+                response = requests.get(url, stream=True)
+                response.raise_for_status()
+                total_length = response.headers.get('content-length')
+                if total_length is None or int(total_length) == 0:
+                    logging.info("No content-length, will download file without progress")
+                    f.write(response.content)
+                else:
+                    dl = 0
+                    total_length = int(total_length)
+                    logging.info("Content length is %ld bytes", total_length)
+                    for data in response.iter_content(chunk_size=4096):
+                        dl += len(data)
+                        f.write(data)
+                        if sys.stdout.isatty():
+                            done = int(50 * dl / total_length)
+                            percent = int(100 * float(dl) / total_length)
+                            sys.stdout.write("\r[{}{}] {}%".format('=' * done, ' ' * (50-done), percent))
+                            sys.stdout.flush()
+            break
+        except Exception as ex:
+            sys.stdout.write("\n")
+            time.sleep(3)
+            logging.info("Exception while downloading %s, retry %s", ex, i + 1)
+            if os.path.exists(path):
+                os.remove(path)
+    else:
+        raise Exception("Cannot download dataset from {}, all retries exceeded".format(url))
+
    sys.stdout.write("\n")
    logging.info("Downloading finished")

--- a/docker/test/stateful_with_coverage/s3downloader
+++ b/docker/test/stateful_with_coverage/s3downloader
@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 import os
 import sys
+import time
 import tarfile
 import logging
 import argparse
@ -16,6 +17,8 @@ AVAILABLE_DATASETS = {
    'visits': 'visits_v1.tar',
 }

+RETRIES_COUNT = 5
+
 def _get_temp_file_name():
    return os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()))

@ -24,25 +27,37 @@ def build_url(base_url, dataset):

 def dowload_with_progress(url, path):
    logging.info("Downloading from %s to temp path %s", url, path)
-    with open(path, 'w') as f:
-        response = requests.get(url, stream=True)
-        response.raise_for_status()
-        total_length = response.headers.get('content-length')
-        if total_length is None or int(total_length) == 0:
-            logging.info("No content-length, will download file without progress")
-            f.write(response.content)
-        else:
-            dl = 0
-            total_length = int(total_length)
-            logging.info("Content length is %ld bytes", total_length)
-            for data in response.iter_content(chunk_size=4096):
-                dl += len(data)
-                f.write(data)
-                if sys.stdout.isatty():
-                    done = int(50 * dl / total_length)
-                    percent = int(100 * float(dl) / total_length)
-                    sys.stdout.write("\r[{}{}] {}%".format('=' * done, ' ' * (50-done), percent))
-                    sys.stdout.flush()
+    for i in range(RETRIES_COUNT):
+        try:
+            with open(path, 'w') as f:
+                response = requests.get(url, stream=True)
+                response.raise_for_status()
+                total_length = response.headers.get('content-length')
+                if total_length is None or int(total_length) == 0:
+                    logging.info("No content-length, will download file without progress")
+                    f.write(response.content)
+                else:
+                    dl = 0
+                    total_length = int(total_length)
+                    logging.info("Content length is %ld bytes", total_length)
+                    for data in response.iter_content(chunk_size=4096):
+                        dl += len(data)
+                        f.write(data)
+                        if sys.stdout.isatty():
+                            done = int(50 * dl / total_length)
+                            percent = int(100 * float(dl) / total_length)
+                            sys.stdout.write("\r[{}{}] {}%".format('=' * done, ' ' * (50-done), percent))
+                            sys.stdout.flush()
+            break
+        except Exception as ex:
+            sys.stdout.write("\n")
+            time.sleep(3)
+            logging.info("Exception while downloading %s, retry %s", ex, i + 1)
+            if os.path.exists(path):
+                os.remove(path)
+    else:
+        raise Exception("Cannot download dataset from {}, all retries exceeded".format(url))
+
    sys.stdout.write("\n")
    logging.info("Downloading finished")