Merge pull request #40127 from ClickHouse/s3-helper

Rework S3Helper a little bit
This commit is contained in:
Mikhail f. Shiryaev 2022-08-13 16:27:00 +02:00 committed by GitHub
commit 22c53e7f7b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 72 additions and 55 deletions

View File

@ -12,7 +12,6 @@ from env_helper import (
GITHUB_RUN_URL,
REPORTS_PATH,
REPO_COPY,
S3_URL,
TEMP_PATH,
)
from s3_helper import S3Helper
@ -118,7 +117,7 @@ if __name__ == "__main__":
"core.gz": os.path.join(workspace_path, "core.gz"),
}
s3_helper = S3Helper(S3_URL)
s3_helper = S3Helper()
for f in paths:
try:
paths[f] = s3_helper.upload_test_report_to_s3(paths[f], s3_prefix + "/" + f)

View File

@ -15,7 +15,7 @@ from env_helper import (
IMAGES_PATH,
REPO_COPY,
S3_BUILDS_BUCKET,
S3_URL,
S3_DOWNLOAD,
TEMP_PATH,
)
from s3_helper import S3Helper
@ -143,9 +143,9 @@ def check_for_success_run(
for url in build_results:
url_escaped = url.replace("+", "%2B").replace(" ", "%20")
if BUILD_LOG_NAME in url:
log_url = f"{S3_URL}/{S3_BUILDS_BUCKET}/{url_escaped}"
log_url = f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/{url_escaped}"
else:
build_urls.append(f"{S3_URL}/{S3_BUILDS_BUCKET}/{url_escaped}")
build_urls.append(f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/{url_escaped}")
if not log_url:
# log is uploaded the last, so if there's no log we need to rerun the build
return
@ -249,7 +249,7 @@ def main():
logging.info("Repo copy path %s", REPO_COPY)
s3_helper = S3Helper(S3_URL)
s3_helper = S3Helper()
version = get_version_from_repo(git=Git(True))
release_or_pr, performance_pr = get_release_or_pr(pr_info, version)

View File

@ -14,7 +14,6 @@ from env_helper import (
GITHUB_RUN_URL,
GITHUB_SERVER_URL,
REPORTS_PATH,
S3_URL,
TEMP_PATH,
)
from report import create_build_html_report
@ -245,7 +244,7 @@ def main():
logging.error("No success builds, failing check")
sys.exit(1)
s3_helper = S3Helper(S3_URL)
s3_helper = S3Helper()
branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commits/master"
branch_name = "master"

View File

@ -10,7 +10,7 @@ from pathlib import Path
import requests # type: ignore
from compress_files import decompress_fast, compress_fast
from env_helper import S3_URL, S3_BUILDS_BUCKET
from env_helper import S3_DOWNLOAD, S3_BUILDS_BUCKET
DOWNLOAD_RETRIES_COUNT = 5
@ -74,7 +74,7 @@ def get_ccache_if_not_exists(
for obj in objects:
if ccache_name in obj:
logging.info("Found ccache on path %s", obj)
url = f"{S3_URL}/{S3_BUILDS_BUCKET}/{obj}"
url = f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/{obj}"
compressed_cache = os.path.join(temp_path, os.path.basename(obj))
dowload_file_with_progress(url, compressed_cache)

View File

@ -7,7 +7,7 @@ import logging
from github import Github
from env_helper import IMAGES_PATH, REPO_COPY, S3_TEST_REPORTS_BUCKET, S3_URL
from env_helper import IMAGES_PATH, REPO_COPY, S3_TEST_REPORTS_BUCKET, S3_DOWNLOAD
from stopwatch import Stopwatch
from upload_result_helper import upload_results
from s3_helper import S3Helper
@ -23,7 +23,7 @@ def get_run_command(repo_path, output_path, image):
cmd = (
"docker run " + f"--volume={repo_path}:/repo_folder "
f"--volume={output_path}:/test_output "
f"-e 'DATA={S3_URL}/{S3_TEST_REPORTS_BUCKET}/codebrowser/data' {image}"
f"-e 'DATA={S3_DOWNLOAD}/{S3_TEST_REPORTS_BUCKET}/codebrowser/data' {image}"
)
return cmd
@ -41,7 +41,7 @@ if __name__ == "__main__":
os.makedirs(temp_path)
docker_image = get_image_with_version(IMAGES_PATH, "clickhouse/codebrowser")
s3_helper = S3Helper(S3_URL)
s3_helper = S3Helper()
result_path = os.path.join(temp_path, "result_path")
if not os.path.exists(result_path):
@ -70,7 +70,7 @@ if __name__ == "__main__":
)
index_html = (
'<a href="{S3_URL}/{S3_TEST_REPORTS_BUCKET}/codebrowser/index.html">'
'<a href="{S3_DOWNLOAD}/{S3_TEST_REPORTS_BUCKET}/codebrowser/index.html">'
"HTML report</a>"
)

View File

@ -8,7 +8,7 @@ import sys
from github import Github
from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH, S3_URL
from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH
from s3_helper import S3Helper
from get_robot_token import get_best_robot_token
from pr_info import PRInfo
@ -169,7 +169,7 @@ if __name__ == "__main__":
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
s3_helper = S3Helper(S3_URL)
s3_helper = S3Helper()
state, description, test_results, additional_logs = process_result(
result_path, server_log_path
)

View File

@ -14,7 +14,7 @@ from github import Github
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
from commit_status_helper import post_commit_status
from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP, GITHUB_RUN_URL, S3_URL
from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP, GITHUB_RUN_URL
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
from pr_info import PRInfo
from s3_helper import S3Helper
@ -460,7 +460,7 @@ def main():
with open(changed_json, "w", encoding="utf-8") as images_file:
json.dump(result_images, images_file)
s3_helper = S3Helper(S3_URL)
s3_helper = S3Helper()
s3_path_prefix = (
str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(" ", "_")

View File

@ -11,7 +11,7 @@ from github import Github
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
from commit_status_helper import post_commit_status
from env_helper import RUNNER_TEMP, S3_URL
from env_helper import RUNNER_TEMP
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
from pr_info import PRInfo
from s3_helper import S3Helper
@ -203,7 +203,7 @@ def main():
json.dump(changed_images, ci)
pr_info = PRInfo()
s3_helper = S3Helper(S3_URL)
s3_helper = S3Helper()
url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME)

View File

@ -16,7 +16,7 @@ from build_check import get_release_or_pr
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
from commit_status_helper import post_commit_status
from docker_images_check import DockerImage
from env_helper import CI, GITHUB_RUN_URL, RUNNER_TEMP, S3_BUILDS_BUCKET, S3_URL
from env_helper import CI, GITHUB_RUN_URL, RUNNER_TEMP, S3_BUILDS_BUCKET, S3_DOWNLOAD
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
from git_helper import Git
from pr_info import PRInfo
@ -309,7 +309,7 @@ def main():
pr_info = PRInfo()
release_or_pr, _ = get_release_or_pr(pr_info, args.version)
args.bucket_prefix = (
f"{S3_URL}/{S3_BUILDS_BUCKET}/{release_or_pr}/{pr_info.sha}"
f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/{release_or_pr}/{pr_info.sha}"
)
if args.push:
@ -335,7 +335,7 @@ def main():
status = "failure"
pr_info = pr_info or PRInfo()
s3_helper = S3Helper(S3_URL)
s3_helper = S3Helper()
url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME)

View File

@ -6,7 +6,7 @@ import os
import sys
from github import Github
from env_helper import TEMP_PATH, REPO_COPY, S3_URL
from env_helper import TEMP_PATH, REPO_COPY
from s3_helper import S3Helper
from pr_info import PRInfo
from get_robot_token import get_best_robot_token
@ -120,7 +120,7 @@ if __name__ == "__main__":
else:
lines.append(("Non zero exit code", "FAIL"))
s3_helper = S3Helper(S3_URL)
s3_helper = S3Helper()
ch_helper = ClickHouseHelper()
report_url = upload_results(

View File

@ -7,7 +7,7 @@ import sys
from github import Github
from env_helper import TEMP_PATH, REPO_COPY, CLOUDFLARE_TOKEN, S3_URL
from env_helper import TEMP_PATH, REPO_COPY, CLOUDFLARE_TOKEN
from s3_helper import S3Helper
from pr_info import PRInfo
from get_robot_token import get_best_robot_token
@ -106,7 +106,7 @@ if __name__ == "__main__":
else:
lines.append(("Non zero exit code", "FAIL"))
s3_helper = S3Helper(S3_URL)
s3_helper = S3Helper()
report_url = upload_results(
s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME

View File

@ -23,6 +23,7 @@ REPORTS_PATH = os.getenv("REPORTS_PATH", p.abspath(p.join(module_dir, "./reports
REPO_COPY = os.getenv("REPO_COPY", git_root)
RUNNER_TEMP = os.getenv("RUNNER_TEMP", p.abspath(p.join(module_dir, "./tmp")))
S3_URL = os.getenv("S3_URL", "https://s3.amazonaws.com")
S3_DOWNLOAD = os.getenv("S3_DOWNLOAD", S3_URL)
S3_BUILDS_BUCKET = os.getenv("S3_BUILDS_BUCKET", "clickhouse-builds")
S3_TEST_REPORTS_BUCKET = os.getenv("S3_TEST_REPORTS_BUCKET", "clickhouse-test-reports")

View File

@ -9,7 +9,7 @@ import atexit
from github import Github
from env_helper import CACHES_PATH, TEMP_PATH, S3_URL
from env_helper import CACHES_PATH, TEMP_PATH
from pr_info import FORCE_TESTS_LABEL, PRInfo
from s3_helper import S3Helper
from get_robot_token import get_best_robot_token
@ -108,7 +108,7 @@ if __name__ == "__main__":
docker_image = get_image_with_version(temp_path, "clickhouse/fasttest")
s3_helper = S3Helper(S3_URL)
s3_helper = S3Helper()
workspace = os.path.join(temp_path, "fasttest-workspace")
if not os.path.exists(workspace):

View File

@ -10,7 +10,7 @@ import atexit
from github import Github
from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH, S3_URL
from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH
from s3_helper import S3Helper
from get_robot_token import get_best_robot_token
from pr_info import FORCE_TESTS_LABEL, PRInfo
@ -88,7 +88,8 @@ def get_run_command(
envs = [
f"-e MAX_RUN_TIME={int(0.9 * kill_timeout)}",
f'-e S3_URL="{S3_URL}/clickhouse-datasets"',
# a static link, don't use S3_URL or S3_DOWNLOAD
'-e S3_URL="https://s3.amazonaws.com/clickhouse-datasets"',
]
if flaky_check:
@ -314,7 +315,7 @@ if __name__ == "__main__":
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
s3_helper = S3Helper(S3_URL)
s3_helper = S3Helper()
state, description, test_results, additional_logs = process_results(
result_path, server_log_path

View File

@ -10,7 +10,7 @@ import sys
from github import Github
from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH, S3_URL
from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH
from s3_helper import S3Helper
from get_robot_token import get_best_robot_token
from pr_info import PRInfo
@ -249,7 +249,7 @@ if __name__ == "__main__":
ch_helper = ClickHouseHelper()
mark_flaky_tests(ch_helper, check_name, test_results)
s3_helper = S3Helper(S3_URL)
s3_helper = S3Helper()
report_url = upload_results(
s3_helper,
pr_info.number,

View File

@ -9,7 +9,7 @@ import boto3
from github import Github
import requests
from env_helper import REPO_COPY, TEMP_PATH, S3_BUILDS_BUCKET, S3_URL
from env_helper import REPO_COPY, TEMP_PATH, S3_BUILDS_BUCKET, S3_DOWNLOAD
from stopwatch import Stopwatch
from upload_result_helper import upload_results
from s3_helper import S3Helper
@ -192,7 +192,7 @@ if __name__ == "__main__":
# run (see .github/workflows/jepsen.yml) So we cannot add explicit
# dependency on a build job and using busy loop on it's results. For the
# same reason we are using latest docker image.
build_url = f"{S3_URL}/{S3_BUILDS_BUCKET}/{release_or_pr}/{pr_info.sha}/{build_name}/clickhouse"
build_url = f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/{release_or_pr}/{pr_info.sha}/{build_name}/clickhouse"
head = requests.head(build_url)
counter = 0
while head.status_code != 200:
@ -248,7 +248,7 @@ if __name__ == "__main__":
description = "No Jepsen output log"
test_result = [("No Jepsen output log", "FAIL")]
s3_helper = S3Helper(S3_URL)
s3_helper = S3Helper()
report_url = upload_results(
s3_helper,
pr_info.number,

View File

@ -15,7 +15,7 @@ from github import Github
from commit_status_helper import get_commit, post_commit_status
from ci_config import CI_CONFIG
from docker_pull_helper import get_image_with_version
from env_helper import GITHUB_EVENT_PATH, GITHUB_RUN_URL, S3_BUILDS_BUCKET, S3_URL
from env_helper import GITHUB_EVENT_PATH, GITHUB_RUN_URL, S3_BUILDS_BUCKET, S3_DOWNLOAD
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
from pr_info import PRInfo
from rerun_helper import RerunHelper
@ -86,7 +86,7 @@ if __name__ == "__main__":
docker_env = ""
docker_env += f" -e S3_URL={S3_URL}/{S3_BUILDS_BUCKET}"
docker_env += f" -e S3_URL={S3_DOWNLOAD}/{S3_BUILDS_BUCKET}"
docker_env += f" -e BUILD_NAME={required_build}"
if pr_info.number == 0:
@ -197,7 +197,7 @@ if __name__ == "__main__":
}
s3_prefix = f"{pr_info.number}/{pr_info.sha}/{check_name_prefix}/"
s3_helper = S3Helper(S3_URL)
s3_helper = S3Helper()
uploaded = {} # type: Dict[str, str]
for name, path in paths.items():
try:

View File

@ -9,7 +9,7 @@ from typing import Dict, List, Tuple
from artifactory import ArtifactorySaaSPath # type: ignore
from build_download_helper import dowload_build_with_progress
from env_helper import RUNNER_TEMP, S3_BUILDS_BUCKET, S3_URL
from env_helper import RUNNER_TEMP, S3_BUILDS_BUCKET, S3_DOWNLOAD
from git_helper import TAG_REGEXP, commit, removeprefix, removesuffix
@ -98,7 +98,7 @@ class Packages:
class S3:
template = (
f"{S3_URL}"
f"{S3_DOWNLOAD}"
# "clickhouse-builds/"
f"{S3_BUILDS_BUCKET}/"
# "33333/" or "21.11/" from --release, if pull request is omitted

View File

@ -9,7 +9,14 @@ from multiprocessing.dummy import Pool
import boto3 # type: ignore
from env_helper import S3_TEST_REPORTS_BUCKET, S3_BUILDS_BUCKET, RUNNER_TEMP, CI, S3_URL
from env_helper import (
S3_TEST_REPORTS_BUCKET,
S3_BUILDS_BUCKET,
RUNNER_TEMP,
CI,
S3_URL,
S3_DOWNLOAD,
)
from compress_files import compress_file_fast
@ -33,9 +40,11 @@ def _flatten_list(lst):
class S3Helper:
def __init__(self, host):
def __init__(self, host=S3_URL, download_host=S3_DOWNLOAD):
self.session = boto3.session.Session(region_name="us-east-1")
self.client = self.session.client("s3", endpoint_url=host)
self.host = host
self.download_host = download_host
def _upload_file_to_s3(self, bucket_name, file_path, s3_path):
logging.debug(
@ -98,7 +107,7 @@ class S3Helper:
logging.info("Upload %s to %s. Meta: %s", file_path, s3_path, metadata)
# last two replacements are specifics of AWS urls:
# https://jamesd3142.wordpress.com/2018/02/28/amazon-s3-and-the-plus-symbol/
url = f"{S3_URL}/{bucket_name}/{s3_path}"
url = f"{self.download_host}/{bucket_name}/{s3_path}"
return url.replace("+", "%2B").replace(" ", "%20")
def upload_test_report_to_s3(self, file_path, s3_path):
@ -170,7 +179,7 @@ class S3Helper:
t = time.time()
except Exception as ex:
logging.critical("Failed to upload file, expcetion %s", ex)
return f"{S3_URL}/{bucket_name}/{s3_path}"
return f"{self.download_host}/{bucket_name}/{s3_path}"
p = Pool(256)
@ -279,6 +288,13 @@ class S3Helper:
return result
def exists(self, key, bucket=S3_BUILDS_BUCKET):
try:
self.client.head_object(Bucket=bucket, Key=key)
return True
except Exception:
return False
@staticmethod
def copy_file_to_local(bucket_name, file_path, s3_path):
local_path = os.path.abspath(

View File

@ -7,7 +7,7 @@ import sys
from github import Github
from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH, S3_URL
from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH
from s3_helper import S3Helper
from get_robot_token import get_best_robot_token
from pr_info import PRInfo
@ -126,7 +126,7 @@ if __name__ == "__main__":
)
ch_helper = ClickHouseHelper()
s3_helper = S3Helper(S3_URL)
s3_helper = S3Helper()
report_url = upload_results(
s3_helper,
pr_info.number,

View File

@ -8,7 +8,7 @@ import sys
from github import Github
from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH, S3_URL
from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH
from s3_helper import S3Helper
from get_robot_token import get_best_robot_token
from pr_info import PRInfo
@ -31,7 +31,8 @@ def get_run_command(
):
cmd = (
"docker run --cap-add=SYS_PTRACE "
f"-e S3_URL='{S3_URL}/clickhouse-datasets' "
# a static link, don't use S3_URL or S3_DOWNLOAD
"-e S3_URL='https://s3.amazonaws.com/clickhouse-datasets' "
f"--volume={build_path}:/package_folder "
f"--volume={result_folder}:/test_output "
f"--volume={repo_tests_path}:/usr/share/clickhouse-test "
@ -148,7 +149,7 @@ if __name__ == "__main__":
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
s3_helper = S3Helper(S3_URL)
s3_helper = S3Helper()
state, description, test_results, additional_logs = process_results(
result_path, server_log_path, run_log_path
)

View File

@ -15,7 +15,7 @@ from clickhouse_helper import (
)
from commit_status_helper import post_commit_status, update_mergeable_check
from docker_pull_helper import get_image_with_version
from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP, S3_URL
from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP
from get_robot_token import get_best_robot_token
from github_helper import GitHub
from git_helper import git_runner
@ -166,7 +166,7 @@ if __name__ == "__main__":
os.makedirs(temp_path)
docker_image = get_image_with_version(temp_path, "clickhouse/style-test")
s3_helper = S3Helper(S3_URL)
s3_helper = S3Helper()
cmd = (
f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --cap-add=SYS_PTRACE "

View File

@ -7,7 +7,7 @@ import subprocess
from github import Github
from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH, S3_URL
from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH
from s3_helper import S3Helper
from get_robot_token import get_best_robot_token
from pr_info import PRInfo
@ -147,7 +147,7 @@ if __name__ == "__main__":
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
s3_helper = S3Helper(S3_URL)
s3_helper = S3Helper()
state, description, test_results, additional_logs = process_result(test_output)
ch_helper = ClickHouseHelper()