#!/usr/bin/python3 import argparse import collections import fuzzywuzzy.fuzz import itertools import json import os import re import sys parser = argparse.ArgumentParser(description='Format changelog for given PRs.') parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs='?', default=sys.stdin, help='File with PR numbers, one per line.') args = parser.parse_args() # This function mirrors the PR description checks in ClickhousePullRequestTrigger. # Returns False if the PR should not be mentioned changelog. def parse_one_pull_request(item): description = item['body'] # Don't skip empty lines because they delimit parts of description lines = [line for line in [x.strip() for x in (description.split('\n') if description else [])]] lines = [re.sub(r'\s+', ' ', l) for l in lines] category = '' entry = '' if lines: i = 0 while i < len(lines): if re.match(r'(?i)^[>*_ ]*change\s*log\s*category', lines[i]): i += 1 if i >= len(lines): break # Can have one empty line between header and the category itself. Filter it out. if not lines[i]: i += 1 if i >= len(lines): break category = re.sub(r'^[-*\s]*', '', lines[i]) i += 1 elif re.match(r'(?i)^[>*_ ]*(short\s*description|change\s*log\s*entry)', lines[i]): i += 1 # Can have one empty line between header and the entry itself. Filter it out. if i < len(lines) and not lines[i]: i += 1 # All following lines until empty one are the changelog entry. entry_lines = [] while i < len(lines) and lines[i]: entry_lines.append(lines[i]) i += 1 entry = ' '.join(entry_lines) else: i += 1 if not category: # Shouldn't happen, because description check in CI should catch such PRs. # Fall through, so that it shows up in output and the user can fix it. category = "NO CL CATEGORY" # Filter out the PR categories that are not for changelog. if re.match(r'(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)', category): return False if not entry: # Shouldn't happen, because description check in CI should catch such PRs. category = "NO CL ENTRY" entry = "NO CL ENTRY: '" + item['title'] + "'" entry = entry.strip() if entry[-1] != '.': entry += '.' item['entry'] = entry item['category'] = category return True # This array gives the preferred category order, and is also used to # normalize category names. categories_preferred_order = ['Backward Incompatible Change', 'New Feature', 'Performance Improvement', 'Improvement', 'Bug Fix', 'Build/Testing/Packaging Improvement', 'Other'] category_to_pr = collections.defaultdict(lambda: []) users = {} for line in args.file: pr = json.loads(open(f'pr{line.strip()}.json').read()) assert(pr['number']) if not parse_one_pull_request(pr): continue assert(pr['category']) # Normalize category name for c in categories_preferred_order: if fuzzywuzzy.fuzz.ratio(pr['category'].lower(), c.lower()) >= 90: pr['category'] = c break category_to_pr[pr['category']].append(pr) user_id = pr['user']['id'] users[user_id] = json.loads(open(f'user{user_id}.json').read()) def print_category(category): print(("#### " + category)) print() for pr in category_to_pr[category]: user = users[pr["user"]["id"]] user_name = user["name"] if user["name"] else user["login"] # Substitute issue links. # 1) issue number w/o markdown link pr["entry"] = re.sub(r'([^[])#([0-9]{4,})', r'\1[#\2](https://github.com/ClickHouse/ClickHouse/issues/\2)', pr["entry"]) # 2) issue URL w/o markdown link pr["entry"] = re.sub(r'([^(])https://github.com/ClickHouse/ClickHouse/issues/([0-9]{4,})', r'\1[#\2](https://github.com/ClickHouse/ClickHouse/issues/\2)', pr["entry"]) print(f'* {pr["entry"]} [#{pr["number"]}]({pr["html_url"]}) ([{user_name}]({user["html_url"]})).') print() # Print categories in preferred order for category in categories_preferred_order: if category in category_to_pr: print_category(category) category_to_pr.pop(category) # Print the rest of the categories for category in category_to_pr: print_category(category)