Merge pull request #49475 from ClickHouse/check-description-in-lambda

Check the PRs body directly in lambda, without rerun
This commit is contained in:
alesapin 2023-05-04 11:17:23 +02:00 committed by GitHub
commit f7470e1453
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 138 additions and 146 deletions

View File

@ -2,7 +2,7 @@
A technical comment, you are free to remove or leave it as it is when PR is created
The following categories are used in the next scripts, update them accordingly
utils/changelog/changelog.py
tests/ci/run_check.py
tests/ci/cancel_and_rerun_workflow_lambda/app.py
-->
### Changelog category (leave one):
- New Feature

View File

@ -2,11 +2,11 @@
from base64 import b64decode
from collections import namedtuple
from typing import Any, Dict, List
from typing import Any, Dict, List, Optional, Tuple
from threading import Thread
from queue import Queue
import json
import os
import re
import time
import jwt
@ -27,6 +27,123 @@ MAX_RETRY = 5
DEBUG_INFO = {} # type: Dict[str, Any]
# Descriptions are used in .github/PULL_REQUEST_TEMPLATE.md, keep comments there
# updated accordingly
# The following lists are append only, try to avoid editing them
# They still could be cleaned out after the decent time though.
LABELS = {
"pr-backward-incompatible": ["Backward Incompatible Change"],
"pr-bugfix": [
"Bug Fix",
"Bug Fix (user-visible misbehavior in an official stable release)",
"Bug Fix (user-visible misbehaviour in official stable or prestable release)",
"Bug Fix (user-visible misbehavior in official stable or prestable release)",
],
"pr-build": [
"Build/Testing/Packaging Improvement",
"Build Improvement",
"Build/Testing Improvement",
"Build",
"Packaging Improvement",
],
"pr-documentation": [
"Documentation (changelog entry is not required)",
"Documentation",
],
"pr-feature": ["New Feature"],
"pr-improvement": ["Improvement"],
"pr-not-for-changelog": [
"Not for changelog (changelog entry is not required)",
"Not for changelog",
],
"pr-performance": ["Performance Improvement"],
}
CATEGORY_TO_LABEL = {c: lb for lb, categories in LABELS.items() for c in categories}
def check_pr_description(pr_body: str) -> Tuple[str, str]:
"""The function checks the body to being properly formatted according to
.github/PULL_REQUEST_TEMPLATE.md, if the first returned string is not empty,
then there is an error."""
lines = list(map(lambda x: x.strip(), pr_body.split("\n") if pr_body else []))
lines = [re.sub(r"\s+", " ", line) for line in lines]
# Check if body contains "Reverts ClickHouse/ClickHouse#36337"
if [
True
for line in lines
if re.match(r"\AReverts {GITHUB_REPOSITORY}#[\d]+\Z", line)
]:
return "", LABELS["pr-not-for-changelog"][0]
category = ""
entry = ""
description_error = ""
i = 0
while i < len(lines):
if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]):
i += 1
if i >= len(lines):
break
# Can have one empty line between header and the category
# itself. Filter it out.
if not lines[i]:
i += 1
if i >= len(lines):
break
category = re.sub(r"^[-*\s]*", "", lines[i])
i += 1
# Should not have more than one category. Require empty line
# after the first found category.
if i >= len(lines):
break
if lines[i]:
second_category = re.sub(r"^[-*\s]*", "", lines[i])
description_error = (
"More than one changelog category specified: "
f"'{category}', '{second_category}'"
)
return description_error, category
elif re.match(
r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
):
i += 1
# Can have one empty line between header and the entry itself.
# Filter it out.
if i < len(lines) and not lines[i]:
i += 1
# All following lines until empty one are the changelog entry.
entry_lines = []
while i < len(lines) and lines[i]:
entry_lines.append(lines[i])
i += 1
entry = " ".join(entry_lines)
# Don't accept changelog entries like '...'.
entry = re.sub(r"[#>*_.\- ]", "", entry)
# Don't accept changelog entries like 'Close #12345'.
entry = re.sub(r"^[\w\-\s]{0,10}#?\d{5,6}\.?$", "", entry)
else:
i += 1
if not category:
description_error = "Changelog category is empty"
# Filter out the PR categories that are not for changelog.
elif re.match(
r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)",
category,
):
pass # to not check the rest of the conditions
elif category not in CATEGORY_TO_LABEL:
description_error, category = f"Category '{category}' is not valid", ""
elif not entry:
description_error = f"Changelog entry required for category '{category}'"
return description_error, category
class Worker(Thread):
def __init__(
@ -268,11 +385,11 @@ def get_workflow_description(workflow_url: str, token: str) -> WorkflowDescripti
)
def _exec_post_with_retry(url, token):
def _exec_post_with_retry(url: str, token: str, json: Optional[Any] = None) -> Any:
headers = {"Authorization": f"token {token}"}
for i in range(MAX_RETRY):
try:
response = requests.post(url, headers=headers)
response = requests.post(url, headers=headers, json=json)
response.raise_for_status()
return response.json()
except Exception as ex:
@ -373,27 +490,20 @@ def main(event):
return
if action == "edited":
print("PR is edited, check if it needs to rerun")
workflow_descriptions = get_workflows_description_for_pull_request(
pull_request, token
)
workflow_descriptions = (
workflow_descriptions
or get_workflow_description_fallback(pull_request, token)
)
workflow_descriptions.sort(key=lambda x: x.run_id) # type: ignore
most_recent_workflow = workflow_descriptions[-1]
if (
most_recent_workflow.status == "completed"
and most_recent_workflow.name in NEED_RERUN_ON_EDITED
):
print("PR is edited, check if the body is correct")
error, category = check_pr_description(pull_request["body"])
if error:
print(
"The PR's body is changed and workflow is finished. "
"Rerun to check the description"
f"The PR's body is wrong, is going to comment it. The error is: {error}"
)
exec_workflow_url([most_recent_workflow.rerun_url], token)
print("Rerun finished, exiting")
return
post_json = {
"body": "This is an automatic comment. The PR descriptions does not "
f"match the [template]({pull_request['base']['repo']['html_url']}/"
"blob/master/.github/PULL_REQUEST_TEMPLATE.md?plain=1).\n\n"
f"Please, edit it accordingly.\n\nThe error is: {error}"
}
_exec_post_with_retry(pull_request["comments_url"], token, json=post_json)
return
if action == "synchronize":
print("PR is synchronized, going to stop old actions")

View File

@ -1,7 +1,6 @@
#!/usr/bin/env python3
import sys
import logging
import re
from typing import Tuple
from github import Github
@ -21,6 +20,8 @@ from docs_check import NAME as DOCS_NAME
from env_helper import GITHUB_REPOSITORY, GITHUB_SERVER_URL
from get_robot_token import get_best_robot_token
from pr_info import FORCE_TESTS_LABEL, PRInfo
from cancel_and_rerun_workflow_lambda.app import CATEGORY_TO_LABEL, check_pr_description
from workflow_approve_rerun_lambda.app import TRUSTED_CONTRIBUTORS
TRUSTED_ORG_IDS = {
@ -33,40 +34,6 @@ DO_NOT_TEST_LABEL = "do not test"
FEATURE_LABEL = "pr-feature"
SUBMODULE_CHANGED_LABEL = "submodule changed"
# They are used in .github/PULL_REQUEST_TEMPLATE.md, keep comments there
# updated accordingly
# The following lists are append only, try to avoid editing them
# They atill could be cleaned out after the decent time though.
LABELS = {
"pr-backward-incompatible": ["Backward Incompatible Change"],
"pr-bugfix": [
"Bug Fix",
"Bug Fix (user-visible misbehavior in an official stable release)",
"Bug Fix (user-visible misbehaviour in official stable or prestable release)",
"Bug Fix (user-visible misbehavior in official stable or prestable release)",
],
"pr-build": [
"Build/Testing/Packaging Improvement",
"Build Improvement",
"Build/Testing Improvement",
"Build",
"Packaging Improvement",
],
"pr-documentation": [
"Documentation (changelog entry is not required)",
"Documentation",
],
"pr-feature": ["New Feature"],
"pr-improvement": ["Improvement"],
"pr-not-for-changelog": [
"Not for changelog (changelog entry is not required)",
"Not for changelog",
],
"pr-performance": ["Performance Improvement"],
}
CATEGORY_TO_LABEL = {c: lb for lb, categories in LABELS.items() for c in categories}
def pr_is_by_trusted_user(pr_user_login, pr_user_orgs):
if pr_user_login.lower() in TRUSTED_CONTRIBUTORS:
@ -120,91 +87,6 @@ def should_run_ci_for_pr(pr_info: PRInfo) -> Tuple[bool, str, str]:
return True, "No special conditions apply", "pending"
def check_pr_description(pr_info: PRInfo) -> Tuple[str, str]:
lines = list(
map(lambda x: x.strip(), pr_info.body.split("\n") if pr_info.body else [])
)
lines = [re.sub(r"\s+", " ", line) for line in lines]
# Check if body contains "Reverts ClickHouse/ClickHouse#36337"
if [
True
for line in lines
if re.match(rf"\AReverts {GITHUB_REPOSITORY}#[\d]+\Z", line)
]:
return "", LABELS["pr-not-for-changelog"][0]
category = ""
entry = ""
description_error = ""
i = 0
while i < len(lines):
if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]):
i += 1
if i >= len(lines):
break
# Can have one empty line between header and the category
# itself. Filter it out.
if not lines[i]:
i += 1
if i >= len(lines):
break
category = re.sub(r"^[-*\s]*", "", lines[i])
i += 1
# Should not have more than one category. Require empty line
# after the first found category.
if i >= len(lines):
break
if lines[i]:
second_category = re.sub(r"^[-*\s]*", "", lines[i])
result_status = (
"More than one changelog category specified: '"
+ category
+ "', '"
+ second_category
+ "'"
)
return result_status, category
elif re.match(
r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
):
i += 1
# Can have one empty line between header and the entry itself.
# Filter it out.
if i < len(lines) and not lines[i]:
i += 1
# All following lines until empty one are the changelog entry.
entry_lines = []
while i < len(lines) and lines[i]:
entry_lines.append(lines[i])
i += 1
entry = " ".join(entry_lines)
# Don't accept changelog entries like '...'.
entry = re.sub(r"[#>*_.\- ]", "", entry)
# Don't accept changelog entries like 'Close #12345'.
entry = re.sub(r"^[\w\-\s]{0,10}#?\d{5,6}\.?$", "", entry)
else:
i += 1
if not category:
description_error = "Changelog category is empty"
# Filter out the PR categories that are not for changelog.
elif re.match(
r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)",
category,
):
pass # to not check the rest of the conditions
elif category not in CATEGORY_TO_LABEL:
description_error, category = f"Category '{category}' is not valid", ""
elif not entry:
description_error = f"Changelog entry required for category '{category}'"
return description_error, category
def main():
logging.basicConfig(level=logging.INFO)
@ -224,7 +106,7 @@ def main():
gh = Github(get_best_robot_token(), per_page=100)
commit = get_commit(gh, pr_info.sha)
description_error, category = check_pr_description(pr_info)
description_error, category = check_pr_description(pr_info.body)
pr_labels_to_add = []
pr_labels_to_remove = []
if (
@ -262,7 +144,7 @@ def main():
f"expect adding docs for {FEATURE_LABEL}",
DOCS_NAME,
)
else:
elif not description_error:
set_mergeable_check(commit, "skipped")
if description_error: