ClickHouse/tests/ci/ccache_utils.py

137 lines
5.1 KiB
Python
Raw Normal View History

2021-11-10 09:08:43 +00:00
#!/usr/bin/env python3
import logging
import time
import sys
import os
2021-11-10 11:08:23 +00:00
import shutil
2021-11-10 11:13:34 +00:00
from pathlib import Path
2021-11-10 09:08:43 +00:00
2022-08-10 13:22:04 +00:00
import requests # type: ignore
2021-11-10 13:09:23 +00:00
2021-11-10 09:08:43 +00:00
from compress_files import decompress_fast, compress_fast
2022-08-11 13:01:32 +00:00
from env_helper import S3_DOWNLOAD, S3_BUILDS_BUCKET
from s3_helper import S3Helper
2021-11-10 09:08:43 +00:00
DOWNLOAD_RETRIES_COUNT = 5
2021-11-10 09:08:43 +00:00
def dowload_file_with_progress(url, path):
logging.info("Downloading from %s to temp path %s", url, path)
for i in range(DOWNLOAD_RETRIES_COUNT):
try:
with open(path, "wb") as f:
2021-11-10 09:08:43 +00:00
response = requests.get(url, stream=True)
response.raise_for_status()
total_length = response.headers.get("content-length")
2021-11-10 09:08:43 +00:00
if total_length is None or int(total_length) == 0:
logging.info(
"No content-length, will download file without progress"
)
2021-11-10 09:08:43 +00:00
f.write(response.content)
else:
dl = 0
total_length = int(total_length)
logging.info("Content length is %ld bytes", total_length)
for data in response.iter_content(chunk_size=4096):
dl += len(data)
f.write(data)
if sys.stdout.isatty():
done = int(50 * dl / total_length)
percent = int(100 * float(dl) / total_length)
eq_str = "=" * done
space_str = " " * (50 - done)
2021-11-10 09:08:43 +00:00
sys.stdout.write(f"\r[{eq_str}{space_str}] {percent}%")
sys.stdout.flush()
break
except Exception as ex:
sys.stdout.write("\n")
time.sleep(3)
logging.info("Exception while downloading %s, retry %s", ex, i + 1)
if os.path.exists(path):
os.remove(path)
else:
raise Exception(f"Cannot download dataset from {url}, all retries exceeded")
sys.stdout.write("\n")
logging.info("Downloading finished")
def get_ccache_if_not_exists(
path_to_ccache_dir: str,
s3_helper: S3Helper,
current_pr_number: int,
temp_path: str,
release_pr: int,
2022-04-11 15:03:27 +00:00
) -> int:
"""returns: number of PR for downloaded PR. -1 if ccache not found"""
2021-11-10 09:08:43 +00:00
ccache_name = os.path.basename(path_to_ccache_dir)
cache_found = False
prs_to_check = [current_pr_number]
# Release PR is either 0 or defined
if release_pr:
prs_to_check.append(release_pr)
2022-04-11 15:03:27 +00:00
ccache_pr = -1
2021-11-10 09:08:43 +00:00
if current_pr_number != 0:
prs_to_check.append(0)
for pr_number in prs_to_check:
logging.info("Searching cache for pr %s", pr_number)
s3_path_prefix = str(pr_number) + "/ccaches"
all_cache_objects = s3_helper.list_prefix(s3_path_prefix)
logging.info("Found %s objects for pr %s", len(all_cache_objects), pr_number)
objects = [obj for obj in all_cache_objects if ccache_name in obj]
if not objects:
continue
logging.info(
"Found ccache archives for pr %s: %s", pr_number, ", ".join(objects)
)
obj = objects[0]
# There are multiple possible caches, the newest one ends with .tar.zst
zst_cache = [obj for obj in objects if obj.endswith(".tar.zst")]
if zst_cache:
obj = zst_cache[0]
logging.info("Found ccache on path %s", obj)
url = f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/{obj}"
compressed_cache = os.path.join(temp_path, os.path.basename(obj))
dowload_file_with_progress(url, compressed_cache)
path_to_decompress = str(Path(path_to_ccache_dir).parent)
if not os.path.exists(path_to_decompress):
os.makedirs(path_to_decompress)
if os.path.exists(path_to_ccache_dir):
shutil.rmtree(path_to_ccache_dir)
logging.info("Ccache already exists, removing it")
logging.info("Decompressing cache to path %s", path_to_decompress)
decompress_fast(compressed_cache, path_to_decompress)
logging.info("Files on path %s", os.listdir(path_to_decompress))
cache_found = True
ccache_pr = pr_number
break
2021-11-10 09:08:43 +00:00
if not cache_found:
logging.info("ccache not found anywhere, cannot download anything :(")
2021-11-11 08:53:20 +00:00
if os.path.exists(path_to_ccache_dir):
logging.info("But at least we have some local cache")
2021-11-10 09:08:43 +00:00
else:
logging.info("ccache downloaded")
2022-04-11 15:03:27 +00:00
return ccache_pr
2021-11-10 09:08:43 +00:00
def upload_ccache(path_to_ccache_dir, s3_helper, current_pr_number, temp_path):
logging.info("Uploading cache %s for pr %s", path_to_ccache_dir, current_pr_number)
ccache_name = os.path.basename(path_to_ccache_dir)
2023-01-01 20:17:43 +00:00
compressed_cache_path = os.path.join(temp_path, ccache_name + ".tar.zst")
2021-11-10 09:08:43 +00:00
compress_fast(path_to_ccache_dir, compressed_cache_path)
s3_path = (
str(current_pr_number) + "/ccaches/" + os.path.basename(compressed_cache_path)
)
2021-11-10 09:08:43 +00:00
logging.info("Will upload %s to path %s", compressed_cache_path, s3_path)
s3_helper.upload_build_file_to_s3(compressed_cache_path, s3_path)
logging.info("Upload finished")