mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-19 16:20:50 +00:00
Merge remote-tracking branch 'origin/master' into auto/v22.7.2.15-stable
This commit is contained in:
commit
c05526beef
2
.github/workflows/tags_stable.yml
vendored
2
.github/workflows/tags_stable.yml
vendored
@ -29,7 +29,7 @@ jobs:
|
||||
fetch-depth: 0
|
||||
- name: Generate versions
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
GITHUB_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }}
|
||||
run: |
|
||||
./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv
|
||||
GID=$(id -g "${UID}")
|
||||
|
@ -5,11 +5,12 @@ from datetime import date, datetime, timedelta
|
||||
from pathlib import Path
|
||||
from os import path as p
|
||||
from time import sleep
|
||||
from typing import List, Optional
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
import github
|
||||
from github.GithubException import RateLimitExceededException
|
||||
from github.Issue import Issue
|
||||
from github.NamedUser import NamedUser
|
||||
from github.PullRequest import PullRequest
|
||||
from github.Repository import Repository
|
||||
|
||||
@ -120,21 +121,15 @@ class GitHub(github.Github):
|
||||
return
|
||||
|
||||
def get_pull_cached(
|
||||
self, repo: Repository, number: int, updated_at: Optional[datetime] = None
|
||||
self, repo: Repository, number: int, obj_updated_at: Optional[datetime] = None
|
||||
) -> PullRequest:
|
||||
pr_cache_file = self.cache_path / f"{number}.pickle"
|
||||
if updated_at is None:
|
||||
updated_at = datetime.now() - timedelta(hours=-1)
|
||||
cache_file = self.cache_path / f"pr-{number}.pickle"
|
||||
|
||||
def _get_pr(path: Path) -> PullRequest:
|
||||
with open(path, "rb") as prfd:
|
||||
return self.load(prfd) # type: ignore
|
||||
|
||||
if pr_cache_file.is_file():
|
||||
cached_pr = _get_pr(pr_cache_file)
|
||||
if updated_at <= cached_pr.updated_at:
|
||||
if cache_file.is_file():
|
||||
is_updated, cached_pr = self._is_cache_updated(cache_file, obj_updated_at)
|
||||
if is_updated:
|
||||
logger.debug("Getting PR #%s from cache", number)
|
||||
return cached_pr
|
||||
return cached_pr # type: ignore
|
||||
logger.debug("Getting PR #%s from API", number)
|
||||
for i in range(self.retries):
|
||||
try:
|
||||
@ -144,11 +139,56 @@ class GitHub(github.Github):
|
||||
if i == self.retries - 1:
|
||||
raise
|
||||
self.sleep_on_rate_limit()
|
||||
logger.debug("Caching PR #%s from API in %s", number, pr_cache_file)
|
||||
with open(pr_cache_file, "wb") as prfd:
|
||||
logger.debug("Caching PR #%s from API in %s", number, cache_file)
|
||||
with open(cache_file, "wb") as prfd:
|
||||
self.dump(pr, prfd) # type: ignore
|
||||
return pr
|
||||
|
||||
def get_user_cached(
|
||||
self, login: str, obj_updated_at: Optional[datetime] = None
|
||||
) -> NamedUser:
|
||||
cache_file = self.cache_path / f"user-{login}.pickle"
|
||||
|
||||
if cache_file.is_file():
|
||||
is_updated, cached_user = self._is_cache_updated(cache_file, obj_updated_at)
|
||||
if is_updated:
|
||||
logger.debug("Getting user %s from cache", login)
|
||||
return cached_user # type: ignore
|
||||
logger.debug("Getting PR #%s from API", login)
|
||||
for i in range(self.retries):
|
||||
try:
|
||||
user = self.get_user(login)
|
||||
break
|
||||
except RateLimitExceededException:
|
||||
if i == self.retries - 1:
|
||||
raise
|
||||
self.sleep_on_rate_limit()
|
||||
logger.debug("Caching user %s from API in %s", login, cache_file)
|
||||
with open(cache_file, "wb") as prfd:
|
||||
self.dump(user, prfd) # type: ignore
|
||||
return user
|
||||
|
||||
def _get_cached(self, path: Path):
|
||||
with open(path, "rb") as ob_fd:
|
||||
return self.load(ob_fd) # type: ignore
|
||||
|
||||
def _is_cache_updated(
|
||||
self, cache_file: Path, obj_updated_at: Optional[datetime]
|
||||
) -> Tuple[bool, object]:
|
||||
cached_obj = self._get_cached(cache_file)
|
||||
# We don't want the cache_updated being always old,
|
||||
# for example in cases when the user is not updated for ages
|
||||
cache_updated = max(
|
||||
datetime.fromtimestamp(cache_file.stat().st_mtime), cached_obj.updated_at
|
||||
)
|
||||
if obj_updated_at is None:
|
||||
# When we don't know about the object is updated or not,
|
||||
# we update it once per hour
|
||||
obj_updated_at = datetime.now() - timedelta(hours=1)
|
||||
if obj_updated_at <= cache_updated:
|
||||
return True, cached_obj
|
||||
return False, cached_obj
|
||||
|
||||
@property
|
||||
def cache_path(self):
|
||||
return self._cache_path
|
||||
|
@ -335,7 +335,7 @@ class Release:
|
||||
yield
|
||||
except (Exception, KeyboardInterrupt):
|
||||
logging.warning("Rolling back checked out %s for %s", ref, orig_ref)
|
||||
self.run(f"git reset --hard; git checkout {orig_ref}")
|
||||
self.run(f"git reset --hard; git checkout -f {orig_ref}")
|
||||
raise
|
||||
else:
|
||||
if with_checkout_back and need_rollback:
|
||||
|
@ -6,20 +6,14 @@ import logging
|
||||
import os.path as p
|
||||
import os
|
||||
import re
|
||||
from datetime import date, datetime, timedelta
|
||||
from queue import Empty, Queue
|
||||
from datetime import date, timedelta
|
||||
from subprocess import CalledProcessError, DEVNULL
|
||||
from threading import Thread
|
||||
from time import sleep
|
||||
from typing import Dict, List, Optional, TextIO
|
||||
|
||||
from fuzzywuzzy.fuzz import ratio # type: ignore
|
||||
from github import Github
|
||||
from github_helper import GitHub, PullRequest, PullRequests, Repository
|
||||
from github.GithubException import RateLimitExceededException, UnknownObjectException
|
||||
from github.NamedUser import NamedUser
|
||||
from github.Issue import Issue
|
||||
from github.PullRequest import PullRequest
|
||||
from github.Repository import Repository
|
||||
from git_helper import is_shallow, git_runner as runner
|
||||
|
||||
# This array gives the preferred category order, and is also used to
|
||||
@ -39,7 +33,7 @@ categories_preferred_order = (
|
||||
FROM_REF = ""
|
||||
TO_REF = ""
|
||||
SHA_IN_CHANGELOG = [] # type: List[str]
|
||||
GitHub = Github()
|
||||
gh = GitHub()
|
||||
CACHE_PATH = p.join(p.dirname(p.realpath(__file__)), "gh_cache")
|
||||
|
||||
|
||||
@ -49,7 +43,7 @@ class Description:
|
||||
):
|
||||
self.number = number
|
||||
self.html_url = html_url
|
||||
self.user = user
|
||||
self.user = gh.get_user_cached(user._rawData["login"]) # type: ignore
|
||||
self.entry = entry
|
||||
self.category = category
|
||||
|
||||
@ -78,7 +72,7 @@ class Description:
|
||||
user_name = self.user.login
|
||||
break
|
||||
except RateLimitExceededException:
|
||||
sleep_on_rate_limit()
|
||||
gh.sleep_on_rate_limit()
|
||||
return (
|
||||
f"* {entry} [#{self.number}]({self.html_url}) "
|
||||
f"([{user_name}]({self.user.html_url}))."
|
||||
@ -94,85 +88,34 @@ class Description:
|
||||
return self.number < other.number
|
||||
|
||||
|
||||
class Worker(Thread):
|
||||
def __init__(self, request_queue: Queue, repo: Repository):
|
||||
Thread.__init__(self)
|
||||
self.queue = request_queue
|
||||
self.repo = repo
|
||||
self.response = [] # type: List[Description]
|
||||
|
||||
def run(self):
|
||||
while not self.queue.empty():
|
||||
try:
|
||||
issue = self.queue.get() # type: Issue
|
||||
except Empty:
|
||||
break # possible race condition, just continue
|
||||
api_pr = get_pull_cached(self.repo, issue.number, issue.updated_at)
|
||||
in_changelog = False
|
||||
merge_commit = api_pr.merge_commit_sha
|
||||
try:
|
||||
runner.run(f"git rev-parse '{merge_commit}'")
|
||||
except CalledProcessError:
|
||||
# It's possible that commit not in the repo, just continue
|
||||
logging.info("PR %s does not belong to the repo", api_pr.number)
|
||||
continue
|
||||
|
||||
in_changelog = merge_commit in SHA_IN_CHANGELOG
|
||||
if in_changelog:
|
||||
desc = generate_description(api_pr, self.repo)
|
||||
if desc is not None:
|
||||
self.response.append(desc)
|
||||
|
||||
self.queue.task_done()
|
||||
|
||||
|
||||
def sleep_on_rate_limit(time: int = 20):
|
||||
logging.warning("Faced rate limit, sleeping %s", time)
|
||||
sleep(time)
|
||||
|
||||
|
||||
def get_pull_cached(
|
||||
repo: Repository, number: int, updated_at: Optional[datetime] = None
|
||||
) -> PullRequest:
|
||||
pr_cache_file = p.join(CACHE_PATH, f"{number}.pickle")
|
||||
if updated_at is None:
|
||||
updated_at = datetime.now() - timedelta(hours=-1)
|
||||
|
||||
if p.isfile(pr_cache_file):
|
||||
cache_updated = datetime.fromtimestamp(p.getmtime(pr_cache_file))
|
||||
if cache_updated > updated_at:
|
||||
with open(pr_cache_file, "rb") as prfd:
|
||||
return GitHub.load(prfd) # type: ignore
|
||||
while True:
|
||||
try:
|
||||
pr = repo.get_pull(number)
|
||||
break
|
||||
except RateLimitExceededException:
|
||||
sleep_on_rate_limit()
|
||||
with open(pr_cache_file, "wb") as prfd:
|
||||
GitHub.dump(pr, prfd) # type: ignore
|
||||
return pr
|
||||
|
||||
|
||||
def get_descriptions(
|
||||
repo: Repository, issues: List[Issue], jobs: int
|
||||
) -> Dict[str, List[Description]]:
|
||||
workers = [] # type: List[Worker]
|
||||
queue = Queue() # type: Queue[Issue]
|
||||
for issue in issues:
|
||||
queue.put(issue)
|
||||
for _ in range(jobs):
|
||||
worker = Worker(queue, repo)
|
||||
worker.start()
|
||||
workers.append(worker)
|
||||
|
||||
def get_descriptions(prs: PullRequests) -> Dict[str, List[Description]]:
|
||||
descriptions = {} # type: Dict[str, List[Description]]
|
||||
for worker in workers:
|
||||
worker.join()
|
||||
for desc in worker.response:
|
||||
if desc.category not in descriptions:
|
||||
descriptions[desc.category] = []
|
||||
descriptions[desc.category].append(desc)
|
||||
repos = {} # type: Dict[str, Repository]
|
||||
for pr in prs:
|
||||
# See https://github.com/PyGithub/PyGithub/issues/2202,
|
||||
# obj._rawData doesn't spend additional API requests
|
||||
# We'll save some requests
|
||||
# pylint: disable=protected-access
|
||||
repo_name = pr._rawData["base"]["repo"]["full_name"] # type: ignore
|
||||
# pylint: enable=protected-access
|
||||
if repo_name not in repos:
|
||||
repos[repo_name] = pr.base.repo
|
||||
in_changelog = False
|
||||
merge_commit = pr.merge_commit_sha
|
||||
try:
|
||||
runner.run(f"git rev-parse '{merge_commit}'")
|
||||
except CalledProcessError:
|
||||
# It's possible that commit not in the repo, just continue
|
||||
logging.info("PR %s does not belong to the repo", pr.number)
|
||||
continue
|
||||
|
||||
in_changelog = merge_commit in SHA_IN_CHANGELOG
|
||||
if in_changelog:
|
||||
desc = generate_description(pr, repos[repo_name])
|
||||
if desc is not None:
|
||||
if desc.category not in descriptions:
|
||||
descriptions[desc.category] = []
|
||||
descriptions[desc.category].append(desc)
|
||||
|
||||
for descs in descriptions.values():
|
||||
descs.sort()
|
||||
@ -193,6 +136,11 @@ def parse_args() -> argparse.Namespace:
|
||||
default=0,
|
||||
help="set the script verbosity, could be used multiple",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--debug-helpers",
|
||||
action="store_true",
|
||||
help="add debug logging for git_helper and github_helper",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
type=argparse.FileType("w"),
|
||||
@ -246,7 +194,7 @@ def generate_description(item: PullRequest, repo: Repository) -> Optional[Descri
|
||||
branch_parts = item.head.ref.split("/")
|
||||
if len(branch_parts) == 3:
|
||||
try:
|
||||
item = get_pull_cached(repo, int(branch_parts[-1]))
|
||||
item = gh.get_pull_cached(repo, int(branch_parts[-1]))
|
||||
except Exception as e:
|
||||
logging.warning("unable to get backpoted PR, exception: %s", e)
|
||||
else:
|
||||
@ -337,9 +285,13 @@ def generate_description(item: PullRequest, repo: Repository) -> Optional[Descri
|
||||
|
||||
def write_changelog(fd: TextIO, descriptions: Dict[str, List[Description]]):
|
||||
year = date.today().year
|
||||
to_commit = runner(f"git rev-parse {TO_REF}^{{}}")[:11]
|
||||
from_commit = runner(f"git rev-parse {FROM_REF}^{{}}")[:11]
|
||||
fd.write(
|
||||
f"---\nsidebar_position: 1\nsidebar_label: {year}\n---\n\n# {year} Changelog\n\n"
|
||||
f"### ClickHouse release {TO_REF} FIXME as compared to {FROM_REF}\n\n"
|
||||
f"---\nsidebar_position: 1\nsidebar_label: {year}\n---\n\n"
|
||||
f"# {year} Changelog\n\n"
|
||||
f"### ClickHouse release {TO_REF} ({to_commit}) FIXME "
|
||||
f"as compared to {FROM_REF} ({from_commit})\n\n"
|
||||
)
|
||||
|
||||
seen_categories = [] # type: List[str]
|
||||
@ -391,12 +343,15 @@ def set_sha_in_changelog():
|
||||
|
||||
|
||||
def main():
|
||||
log_levels = [logging.CRITICAL, logging.WARN, logging.INFO, logging.DEBUG]
|
||||
log_levels = [logging.WARN, logging.INFO, logging.DEBUG]
|
||||
args = parse_args()
|
||||
logging.basicConfig(
|
||||
format="%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d]:\n%(message)s",
|
||||
level=log_levels[min(args.verbose, 3)],
|
||||
level=log_levels[min(args.verbose, 2)],
|
||||
)
|
||||
if args.debug_helpers:
|
||||
logging.getLogger("github_helper").setLevel(logging.DEBUG)
|
||||
logging.getLogger("git_helper").setLevel(logging.DEBUG)
|
||||
# Create a cache directory
|
||||
if not p.isdir(CACHE_PATH):
|
||||
os.mkdir(CACHE_PATH, 0o700)
|
||||
@ -413,35 +368,29 @@ def main():
|
||||
|
||||
logging.info("Using %s..%s as changelog interval", FROM_REF, TO_REF)
|
||||
|
||||
# use merge-base commit as a starting point, if used ref in another branch
|
||||
base_commit = runner.run(f"git merge-base '{FROM_REF}^{{}}' '{TO_REF}^{{}}'")
|
||||
# Get starting and ending dates for gathering PRs
|
||||
# Add one day after and before to mitigate TZ possible issues
|
||||
# `tag^{}` format gives commit ref when we have annotated tags
|
||||
# format %cs gives a committer date, works better for cherry-picked commits
|
||||
from_date = runner.run(f"git log -1 --format=format:%cs '{FROM_REF}^{{}}'")
|
||||
from_date = (date.fromisoformat(from_date) - timedelta(1)).isoformat()
|
||||
from_date = runner.run(f"git log -1 --format=format:%cs '{base_commit}'")
|
||||
to_date = runner.run(f"git log -1 --format=format:%cs '{TO_REF}^{{}}'")
|
||||
to_date = (date.fromisoformat(to_date) + timedelta(1)).isoformat()
|
||||
merged = (
|
||||
date.fromisoformat(from_date) - timedelta(1),
|
||||
date.fromisoformat(to_date) + timedelta(1),
|
||||
)
|
||||
|
||||
# Get all PRs for the given time frame
|
||||
global GitHub
|
||||
GitHub = Github(
|
||||
global gh
|
||||
gh = GitHub(
|
||||
args.gh_user_or_token, args.gh_password, per_page=100, pool_size=args.jobs
|
||||
)
|
||||
query = f"type:pr repo:{args.repo} is:merged merged:{from_date}..{to_date}"
|
||||
repo = GitHub.get_repo(args.repo)
|
||||
api_prs = GitHub.search_issues(query=query, sort="created")
|
||||
logging.info("Found %s PRs for the query: '%s'", api_prs.totalCount, query)
|
||||
gh.cache_path = CACHE_PATH
|
||||
query = f"type:pr repo:{args.repo} is:merged"
|
||||
prs = gh.get_pulls_from_search(query=query, merged=merged, sort="created")
|
||||
|
||||
issues = [] # type: List[Issue]
|
||||
while True:
|
||||
try:
|
||||
for issue in api_prs:
|
||||
issues.append(issue)
|
||||
break
|
||||
except RateLimitExceededException:
|
||||
sleep_on_rate_limit()
|
||||
|
||||
descriptions = get_descriptions(repo, issues, args.jobs)
|
||||
descriptions = get_descriptions(prs)
|
||||
|
||||
write_changelog(args.output, descriptions)
|
||||
|
||||
|
1
utils/changelog/github_helper.py
Symbolic link
1
utils/changelog/github_helper.py
Symbolic link
@ -0,0 +1 @@
|
||||
../../tests/ci/github_helper.py
|
Loading…
Reference in New Issue
Block a user