From 0193a132d4a1dc368a39b9c0522af809ae8c0b3a Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 8 Sep 2020 11:43:02 +0300 Subject: [PATCH 1/4] Add retries to s3 downloader --- docker/test/stateful/s3downloader | 49 ++++++++++++------- .../test/stateful_with_coverage/s3downloader | 49 ++++++++++++------- 2 files changed, 60 insertions(+), 38 deletions(-) diff --git a/docker/test/stateful/s3downloader b/docker/test/stateful/s3downloader index f8e2bf3cbe4..ca1947e5c17 100755 --- a/docker/test/stateful/s3downloader +++ b/docker/test/stateful/s3downloader @@ -16,6 +16,8 @@ AVAILABLE_DATASETS = { 'visits': 'visits_v1.tar', } +RETRIES_COUNT = 5 + def _get_temp_file_name(): return os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())) @@ -24,25 +26,34 @@ def build_url(base_url, dataset): def dowload_with_progress(url, path): logging.info("Downloading from %s to temp path %s", url, path) - with open(path, 'w') as f: - response = requests.get(url, stream=True) - response.raise_for_status() - total_length = response.headers.get('content-length') - if total_length is None or int(total_length) == 0: - logging.info("No content-length, will download file without progress") - f.write(response.content) - else: - dl = 0 - total_length = int(total_length) - logging.info("Content length is %ld bytes", total_length) - for data in response.iter_content(chunk_size=4096): - dl += len(data) - f.write(data) - if sys.stdout.isatty(): - done = int(50 * dl / total_length) - percent = int(100 * float(dl) / total_length) - sys.stdout.write("\r[{}{}] {}%".format('=' * done, ' ' * (50-done), percent)) - sys.stdout.flush() + for i in range(RETRIES_COUNT): + try: + with open(path, 'w') as f: + response = requests.get(url, stream=True) + response.raise_for_status() + total_length = response.headers.get('content-length') + if total_length is None or int(total_length) == 0: + logging.info("No content-length, will download file without progress") + f.write(response.content) + else: + dl = 0 + total_length = int(total_length) + logging.info("Content length is %ld bytes", total_length) + for data in response.iter_content(chunk_size=4096): + dl += len(data) + f.write(data) + if sys.stdout.isatty(): + done = int(50 * dl / total_length) + percent = int(100 * float(dl) / total_length) + sys.stdout.write("\r[{}{}] {}%".format('=' * done, ' ' * (50-done), percent)) + sys.stdout.flush() + break + except Exception as ex: + sys.stdout.write("\n") + logging.info("Exception while downloading %s, retry %s", ex, i + 1) + if os.path.exists(path): + os.remove(path) + sys.stdout.write("\n") logging.info("Downloading finished") diff --git a/docker/test/stateful_with_coverage/s3downloader b/docker/test/stateful_with_coverage/s3downloader index f8e2bf3cbe4..ca1947e5c17 100755 --- a/docker/test/stateful_with_coverage/s3downloader +++ b/docker/test/stateful_with_coverage/s3downloader @@ -16,6 +16,8 @@ AVAILABLE_DATASETS = { 'visits': 'visits_v1.tar', } +RETRIES_COUNT = 5 + def _get_temp_file_name(): return os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())) @@ -24,25 +26,34 @@ def build_url(base_url, dataset): def dowload_with_progress(url, path): logging.info("Downloading from %s to temp path %s", url, path) - with open(path, 'w') as f: - response = requests.get(url, stream=True) - response.raise_for_status() - total_length = response.headers.get('content-length') - if total_length is None or int(total_length) == 0: - logging.info("No content-length, will download file without progress") - f.write(response.content) - else: - dl = 0 - total_length = int(total_length) - logging.info("Content length is %ld bytes", total_length) - for data in response.iter_content(chunk_size=4096): - dl += len(data) - f.write(data) - if sys.stdout.isatty(): - done = int(50 * dl / total_length) - percent = int(100 * float(dl) / total_length) - sys.stdout.write("\r[{}{}] {}%".format('=' * done, ' ' * (50-done), percent)) - sys.stdout.flush() + for i in range(RETRIES_COUNT): + try: + with open(path, 'w') as f: + response = requests.get(url, stream=True) + response.raise_for_status() + total_length = response.headers.get('content-length') + if total_length is None or int(total_length) == 0: + logging.info("No content-length, will download file without progress") + f.write(response.content) + else: + dl = 0 + total_length = int(total_length) + logging.info("Content length is %ld bytes", total_length) + for data in response.iter_content(chunk_size=4096): + dl += len(data) + f.write(data) + if sys.stdout.isatty(): + done = int(50 * dl / total_length) + percent = int(100 * float(dl) / total_length) + sys.stdout.write("\r[{}{}] {}%".format('=' * done, ' ' * (50-done), percent)) + sys.stdout.flush() + break + except Exception as ex: + sys.stdout.write("\n") + logging.info("Exception while downloading %s, retry %s", ex, i + 1) + if os.path.exists(path): + os.remove(path) + sys.stdout.write("\n") logging.info("Downloading finished") From 73253f058dafed81659fbb127cf60dbd1cb41163 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 8 Sep 2020 11:45:01 +0300 Subject: [PATCH 2/4] Add sleep --- docker/test/stateful/s3downloader | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/test/stateful/s3downloader b/docker/test/stateful/s3downloader index ca1947e5c17..26155d2e6a9 100755 --- a/docker/test/stateful/s3downloader +++ b/docker/test/stateful/s3downloader @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- import os import sys +import time import tarfile import logging import argparse @@ -50,6 +51,7 @@ def dowload_with_progress(url, path): break except Exception as ex: sys.stdout.write("\n") + time.sleep(3) logging.info("Exception while downloading %s, retry %s", ex, i + 1) if os.path.exists(path): os.remove(path) From 1a1bccf41631b7beb637ad8291e3e44b3dcfa0a3 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 8 Sep 2020 11:45:22 +0300 Subject: [PATCH 3/4] Add sleeps --- docker/test/stateful_with_coverage/s3downloader | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/test/stateful_with_coverage/s3downloader b/docker/test/stateful_with_coverage/s3downloader index ca1947e5c17..26155d2e6a9 100755 --- a/docker/test/stateful_with_coverage/s3downloader +++ b/docker/test/stateful_with_coverage/s3downloader @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- import os import sys +import time import tarfile import logging import argparse @@ -50,6 +51,7 @@ def dowload_with_progress(url, path): break except Exception as ex: sys.stdout.write("\n") + time.sleep(3) logging.info("Exception while downloading %s, retry %s", ex, i + 1) if os.path.exists(path): os.remove(path) From c40ba48822f9d676695865970c059992529e4585 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 8 Sep 2020 11:49:24 +0300 Subject: [PATCH 4/4] Raise on error --- docker/test/stateful/s3downloader | 2 ++ docker/test/stateful_with_coverage/s3downloader | 2 ++ 2 files changed, 4 insertions(+) diff --git a/docker/test/stateful/s3downloader b/docker/test/stateful/s3downloader index 26155d2e6a9..fb49931f022 100755 --- a/docker/test/stateful/s3downloader +++ b/docker/test/stateful/s3downloader @@ -55,6 +55,8 @@ def dowload_with_progress(url, path): logging.info("Exception while downloading %s, retry %s", ex, i + 1) if os.path.exists(path): os.remove(path) + else: + raise Exception("Cannot download dataset from {}, all retries exceeded".format(url)) sys.stdout.write("\n") logging.info("Downloading finished") diff --git a/docker/test/stateful_with_coverage/s3downloader b/docker/test/stateful_with_coverage/s3downloader index 26155d2e6a9..fb49931f022 100755 --- a/docker/test/stateful_with_coverage/s3downloader +++ b/docker/test/stateful_with_coverage/s3downloader @@ -55,6 +55,8 @@ def dowload_with_progress(url, path): logging.info("Exception while downloading %s, retry %s", ex, i + 1) if os.path.exists(path): os.remove(path) + else: + raise Exception("Cannot download dataset from {}, all retries exceeded".format(url)) sys.stdout.write("\n") logging.info("Downloading finished")