2020-04-13 21:15:58 +00:00
#!/usr/bin/python3
import argparse
import collections
2020-07-03 08:57:38 +00:00
import fuzzywuzzy . fuzz
import itertools
import json
import os
2020-04-13 21:15:58 +00:00
import re
2020-07-03 08:57:38 +00:00
import sys
2020-04-13 21:15:58 +00:00
parser = argparse . ArgumentParser ( description = ' Format changelog for given PRs. ' )
2020-04-14 09:11:09 +00:00
parser . add_argument ( ' file ' , metavar = ' FILE ' , type = argparse . FileType ( ' r ' , encoding = ' utf-8 ' ) , nargs = ' ? ' , default = sys . stdin , help = ' File with PR numbers, one per line. ' )
2020-04-13 21:15:58 +00:00
args = parser . parse_args ( )
# This function mirrors the PR description checks in ClickhousePullRequestTrigger.
# Returns False if the PR should not be mentioned changelog.
def parse_one_pull_request ( item ) :
description = item [ ' body ' ]
# Don't skip empty lines because they delimit parts of description
2020-11-05 16:14:17 +00:00
lines = [ line for line in [ x . strip ( ) for x in ( description . split ( ' \n ' ) if description else [ ] ) ] ]
2020-04-13 21:15:58 +00:00
lines = [ re . sub ( r ' \ s+ ' , ' ' , l ) for l in lines ]
category = ' '
entry = ' '
if lines :
i = 0
while i < len ( lines ) :
2020-07-03 08:57:38 +00:00
if re . match ( r ' (?i)^[>*_ ]*change \ s*log \ s*category ' , lines [ i ] ) :
2020-04-13 21:15:58 +00:00
i + = 1
if i > = len ( lines ) :
break
2020-04-14 11:28:27 +00:00
# Can have one empty line between header and the category itself. Filter it out.
if not lines [ i ] :
i + = 1
if i > = len ( lines ) :
break
2020-04-13 21:15:58 +00:00
category = re . sub ( r ' ^[-* \ s]* ' , ' ' , lines [ i ] )
i + = 1
2020-07-03 08:57:38 +00:00
elif re . match ( r ' (?i)^[>*_ ]*(short \ s*description|change \ s*log \ s*entry) ' , lines [ i ] ) :
2020-04-13 21:15:58 +00:00
i + = 1
# Can have one empty line between header and the entry itself. Filter it out.
if i < len ( lines ) and not lines [ i ] :
i + = 1
# All following lines until empty one are the changelog entry.
entry_lines = [ ]
while i < len ( lines ) and lines [ i ] :
entry_lines . append ( lines [ i ] )
i + = 1
entry = ' ' . join ( entry_lines )
else :
i + = 1
if not category :
# Shouldn't happen, because description check in CI should catch such PRs.
# Fall through, so that it shows up in output and the user can fix it.
category = " NO CL CATEGORY "
# Filter out the PR categories that are not for changelog.
2020-05-22 20:07:48 +00:00
if re . match ( r ' (?i)doc|((non|in|not|un)[- \ s]*significant)|(not[ ]*for[ ]*changelog) ' , category ) :
2020-04-13 21:15:58 +00:00
return False
if not entry :
# Shouldn't happen, because description check in CI should catch such PRs.
category = " NO CL ENTRY "
entry = " NO CL ENTRY: ' " + item [ ' title ' ] + " ' "
entry = entry . strip ( )
if entry [ - 1 ] != ' . ' :
entry + = ' . '
item [ ' entry ' ] = entry
item [ ' category ' ] = category
return True
2020-07-03 08:57:38 +00:00
# This array gives the preferred category order, and is also used to
# normalize category names.
categories_preferred_order = [ ' Backward Incompatible Change ' ,
' New Feature ' , ' Bug Fix ' , ' Improvement ' , ' Performance Improvement ' ,
' Build/Testing/Packaging Improvement ' , ' Other ' ]
2020-04-13 21:15:58 +00:00
category_to_pr = collections . defaultdict ( lambda : [ ] )
users = { }
2020-04-14 09:11:09 +00:00
for line in args . file :
2020-04-13 21:15:58 +00:00
pr = json . loads ( open ( f ' pr { line . strip ( ) } .json ' ) . read ( ) )
assert ( pr [ ' number ' ] )
if not parse_one_pull_request ( pr ) :
continue
assert ( pr [ ' category ' ] )
2020-07-03 08:57:38 +00:00
# Normalize category name
for c in categories_preferred_order :
2020-09-10 16:49:57 +00:00
if fuzzywuzzy . fuzz . ratio ( pr [ ' category ' ] . lower ( ) , c . lower ( ) ) > = 90 :
2020-07-03 08:57:38 +00:00
pr [ ' category ' ] = c
break
2020-04-13 21:15:58 +00:00
category_to_pr [ pr [ ' category ' ] ] . append ( pr )
user_id = pr [ ' user ' ] [ ' id ' ]
users [ user_id ] = json . loads ( open ( f ' user { user_id } .json ' ) . read ( ) )
def print_category ( category ) :
2020-10-02 16:54:07 +00:00
print ( ( " #### " + category ) )
2020-04-13 21:15:58 +00:00
print ( )
for pr in category_to_pr [ category ] :
user = users [ pr [ " user " ] [ " id " ] ]
user_name = user [ " name " ] if user [ " name " ] else user [ " login " ]
2020-11-13 06:28:36 +00:00
# Substitute issue links.
# 1) issue number w/o markdown link
2020-04-14 11:28:27 +00:00
pr [ " entry " ] = re . sub ( r ' ([^[])#([0-9] { 4,}) ' , r ' \ 1[# \ 2](https://github.com/ClickHouse/ClickHouse/issues/ \ 2) ' , pr [ " entry " ] )
2020-11-13 06:28:36 +00:00
# 2) issue URL w/o markdown link
pr [ " entry " ] = re . sub ( r ' ([^(])https://github.com/ClickHouse/ClickHouse/issues/([0-9] { 4,}) ' , r ' \ 1[# \ 2](https://github.com/ClickHouse/ClickHouse/issues/ \ 2) ' , pr [ " entry " ] )
2020-04-13 21:15:58 +00:00
print ( f ' * { pr [ " entry " ] } [# { pr [ " number " ] } ]( { pr [ " html_url " ] } ) ([ { user_name } ]( { user [ " html_url " ] } )). ' )
print ( )
# Print categories in preferred order
for category in categories_preferred_order :
if category in category_to_pr :
print_category ( category )
category_to_pr . pop ( category )
# Print the rest of the categories
for category in category_to_pr :
print_category ( category )