Apply black formatter to all *.py files in the repo

This commit is contained in:
Mikhail f. Shiryaev 2022-03-22 17:39:58 +01:00
parent 5fea6d24fa
commit e6f5a3f98b
No known key found for this signature in database
GPG Key ID: 4B02ED204C7D93F4
731 changed files with 93889 additions and 44939 deletions

View File

@ -4,11 +4,12 @@
import sys import sys
import json import json
def parse_block(block=[], options=[]): def parse_block(block=[], options=[]):
#print('block is here', block) # print('block is here', block)
#show_query = False # show_query = False
#show_query = options.show_query # show_query = options.show_query
result = [] result = []
query = block[0].strip() query = block[0].strip()
if len(block) > 4: if len(block) > 4:
@ -20,9 +21,9 @@ def parse_block(block=[], options=[]):
timing2 = block[2].strip().split()[1] timing2 = block[2].strip().split()[1]
timing3 = block[3].strip().split()[1] timing3 = block[3].strip().split()[1]
if options.show_queries: if options.show_queries:
result.append( query ) result.append(query)
if not options.show_first_timings: if not options.show_first_timings:
result += [ timing1 , timing2, timing3 ] result += [timing1, timing2, timing3]
else: else:
result.append(timing1) result.append(timing1)
return result return result
@ -37,12 +38,12 @@ def read_stats_file(options, fname):
for line in f.readlines(): for line in f.readlines():
if 'SELECT' in line: if "SELECT" in line:
if len(block) > 1: if len(block) > 1:
result.append( parse_block(block, options) ) result.append(parse_block(block, options))
block = [ line ] block = [line]
elif 'Time:' in line: elif "Time:" in line:
block.append( line ) block.append(line)
return result return result
@ -50,7 +51,7 @@ def read_stats_file(options, fname):
def compare_stats_files(options, arguments): def compare_stats_files(options, arguments):
result = [] result = []
file_output = [] file_output = []
pyplot_colors = ['y', 'b', 'g', 'r'] pyplot_colors = ["y", "b", "g", "r"]
for fname in arguments[1:]: for fname in arguments[1:]:
file_output.append((read_stats_file(options, fname))) file_output.append((read_stats_file(options, fname)))
if len(file_output[0]) > 0: if len(file_output[0]) > 0:
@ -58,65 +59,92 @@ def compare_stats_files(options, arguments):
for idx, data_set in enumerate(file_output): for idx, data_set in enumerate(file_output):
int_result = [] int_result = []
for timing in data_set: for timing in data_set:
int_result.append(float(timing[0])) #y values int_result.append(float(timing[0])) # y values
result.append([[x for x in range(0, len(int_result)) ], int_result, result.append(
pyplot_colors[idx] + '^' ] ) [
# result.append([x for x in range(1, len(int_result)) ]) #x values [x for x in range(0, len(int_result))],
# result.append( pyplot_colors[idx] + '^' ) int_result,
pyplot_colors[idx] + "^",
]
)
# result.append([x for x in range(1, len(int_result)) ]) #x values
# result.append( pyplot_colors[idx] + '^' )
return result return result
def parse_args(): def parse_args():
from optparse import OptionParser from optparse import OptionParser
parser = OptionParser(usage='usage: %prog [options] [result_file_path]..')
parser.add_option("-q", "--show-queries", help="Show statements along with timings", action="store_true", dest="show_queries") parser = OptionParser(usage="usage: %prog [options] [result_file_path]..")
parser.add_option("-f", "--show-first-timings", help="Show only first tries timings", action="store_true", dest="show_first_timings") parser.add_option(
parser.add_option("-c", "--compare-mode", help="Prepare output for pyplot comparing result files.", action="store", dest="compare_mode") "-q",
"--show-queries",
help="Show statements along with timings",
action="store_true",
dest="show_queries",
)
parser.add_option(
"-f",
"--show-first-timings",
help="Show only first tries timings",
action="store_true",
dest="show_first_timings",
)
parser.add_option(
"-c",
"--compare-mode",
help="Prepare output for pyplot comparing result files.",
action="store",
dest="compare_mode",
)
(options, arguments) = parser.parse_args(sys.argv) (options, arguments) = parser.parse_args(sys.argv)
if len(arguments) < 2: if len(arguments) < 2:
parser.print_usage() parser.print_usage()
sys.exit(1) sys.exit(1)
return ( options, arguments ) return (options, arguments)
def gen_pyplot_code(options, arguments): def gen_pyplot_code(options, arguments):
result = '' result = ""
data_sets = compare_stats_files(options, arguments) data_sets = compare_stats_files(options, arguments)
for idx, data_set in enumerate(data_sets, start=0): for idx, data_set in enumerate(data_sets, start=0):
x_values, y_values, line_style = data_set x_values, y_values, line_style = data_set
result += '\nplt.plot(' result += "\nplt.plot("
result += '%s, %s, \'%s\'' % ( x_values, y_values, line_style ) result += "%s, %s, '%s'" % (x_values, y_values, line_style)
result += ', label=\'%s try\')' % idx result += ", label='%s try')" % idx
print('import matplotlib.pyplot as plt') print("import matplotlib.pyplot as plt")
print(result) print(result)
print( 'plt.xlabel(\'Try number\')' ) print("plt.xlabel('Try number')")
print( 'plt.ylabel(\'Timing\')' ) print("plt.ylabel('Timing')")
print( 'plt.title(\'Benchmark query timings\')' ) print("plt.title('Benchmark query timings')")
print('plt.legend()') print("plt.legend()")
print('plt.show()') print("plt.show()")
def gen_html_json(options, arguments): def gen_html_json(options, arguments):
tuples = read_stats_file(options, arguments[1]) tuples = read_stats_file(options, arguments[1])
print('{') print("{")
print('"system: GreenPlum(x2),') print('"system: GreenPlum(x2),')
print(('"version": "%s",' % '4.3.9.1')) print(('"version": "%s",' % "4.3.9.1"))
print('"data_size": 10000000,') print('"data_size": 10000000,')
print('"time": "",') print('"time": "",')
print('"comments": "",') print('"comments": "",')
print('"result":') print('"result":')
print('[') print("[")
for s in tuples: for s in tuples:
print(s) print(s)
print(']') print("]")
print('}') print("}")
def main(): def main():
( options, arguments ) = parse_args() (options, arguments) = parse_args()
if len(arguments) > 2: if len(arguments) > 2:
gen_pyplot_code(options, arguments) gen_pyplot_code(options, arguments)
else: else:
gen_html_json(options, arguments) gen_html_json(options, arguments)
if __name__ == '__main__':
if __name__ == "__main__":
main() main()

View File

@ -11,7 +11,7 @@ def removesuffix(text, suffix):
https://www.python.org/dev/peps/pep-0616/ https://www.python.org/dev/peps/pep-0616/
""" """
if suffix and text.endswith(suffix): if suffix and text.endswith(suffix):
return text[:-len(suffix)] return text[: -len(suffix)]
else: else:
return text[:] return text[:]

View File

@ -3,55 +3,55 @@ import subprocess
import datetime import datetime
from flask import Flask, flash, request, redirect, url_for from flask import Flask, flash, request, redirect, url_for
def run_command(command, wait=False): def run_command(command, wait=False):
print("{} - execute shell command:{}".format(datetime.datetime.now(), command)) print("{} - execute shell command:{}".format(datetime.datetime.now(), command))
lines = [] lines = []
p = subprocess.Popen(command, p = subprocess.Popen(
stdout=subprocess.PIPE, command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True
stderr=subprocess.STDOUT, )
shell=True)
if wait: if wait:
for l in iter(p.stdout.readline, b''): for l in iter(p.stdout.readline, b""):
lines.append(l) lines.append(l)
p.poll() p.poll()
return (lines, p.returncode) return (lines, p.returncode)
else: else:
return(iter(p.stdout.readline, b''), 0) return (iter(p.stdout.readline, b""), 0)
UPLOAD_FOLDER = './' UPLOAD_FOLDER = "./"
ALLOWED_EXTENSIONS = {'txt', 'sh'} ALLOWED_EXTENSIONS = {"txt", "sh"}
app = Flask(__name__) app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER
@app.route('/')
@app.route("/")
def hello_world(): def hello_world():
return 'Hello World' return "Hello World"
def allowed_file(filename): def allowed_file(filename):
return '.' in filename and \ return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
@app.route('/upload', methods=['GET', 'POST']) @app.route("/upload", methods=["GET", "POST"])
def upload_file(): def upload_file():
if request.method == 'POST': if request.method == "POST":
# check if the post request has the file part # check if the post request has the file part
if 'file' not in request.files: if "file" not in request.files:
flash('No file part') flash("No file part")
return redirect(request.url) return redirect(request.url)
file = request.files['file'] file = request.files["file"]
# If the user does not select a file, the browser submits an # If the user does not select a file, the browser submits an
# empty file without a filename. # empty file without a filename.
if file.filename == '': if file.filename == "":
flash('No selected file') flash("No selected file")
return redirect(request.url) return redirect(request.url)
if file and allowed_file(file.filename): if file and allowed_file(file.filename):
filename = file.filename filename = file.filename
file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) file.save(os.path.join(app.config["UPLOAD_FOLDER"], filename))
return redirect(url_for('upload_file', name=filename)) return redirect(url_for("upload_file", name=filename))
return ''' return """
<!doctype html> <!doctype html>
<title>Upload new File</title> <title>Upload new File</title>
<h1>Upload new File</h1> <h1>Upload new File</h1>
@ -59,12 +59,15 @@ def upload_file():
<input type=file name=file> <input type=file name=file>
<input type=submit value=Upload> <input type=submit value=Upload>
</form> </form>
''' """
@app.route('/run', methods=['GET', 'POST'])
@app.route("/run", methods=["GET", "POST"])
def parse_request(): def parse_request():
data = request.data # data is empty data = request.data # data is empty
run_command(data, wait=True) run_command(data, wait=True)
return 'Ok' return "Ok"
if __name__ == '__main__':
app.run(port=5011) if __name__ == "__main__":
app.run(port=5011)

View File

@ -19,58 +19,126 @@ import xml.etree.ElementTree as et
from threading import Thread from threading import Thread
from scipy import stats from scipy import stats
logging.basicConfig(format='%(asctime)s: %(levelname)s: %(module)s: %(message)s', level='WARNING') logging.basicConfig(
format="%(asctime)s: %(levelname)s: %(module)s: %(message)s", level="WARNING"
)
total_start_seconds = time.perf_counter() total_start_seconds = time.perf_counter()
stage_start_seconds = total_start_seconds stage_start_seconds = total_start_seconds
def reportStageEnd(stage): def reportStageEnd(stage):
global stage_start_seconds, total_start_seconds global stage_start_seconds, total_start_seconds
current = time.perf_counter() current = time.perf_counter()
print(f'stage\t{stage}\t{current - stage_start_seconds:.3f}\t{current - total_start_seconds:.3f}') print(
f"stage\t{stage}\t{current - stage_start_seconds:.3f}\t{current - total_start_seconds:.3f}"
)
stage_start_seconds = current stage_start_seconds = current
def tsv_escape(s): def tsv_escape(s):
return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','') return (
s.replace("\\", "\\\\")
.replace("\t", "\\t")
.replace("\n", "\\n")
.replace("\r", "")
)
parser = argparse.ArgumentParser(description='Run performance test.') parser = argparse.ArgumentParser(description="Run performance test.")
# Explicitly decode files as UTF-8 because sometimes we have Russian characters in queries, and LANG=C is set. # Explicitly decode files as UTF-8 because sometimes we have Russian characters in queries, and LANG=C is set.
parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, help='test description file') parser.add_argument(
parser.add_argument('--host', nargs='*', default=['localhost'], help="Space-separated list of server hostname(s). Corresponds to '--port' options.") "file",
parser.add_argument('--port', nargs='*', default=[9000], help="Space-separated list of server port(s). Corresponds to '--host' options.") metavar="FILE",
parser.add_argument('--runs', type=int, default=1, help='Number of query runs per server.') type=argparse.FileType("r", encoding="utf-8"),
parser.add_argument('--max-queries', type=int, default=None, help='Test no more than this number of queries, chosen at random.') nargs=1,
parser.add_argument('--queries-to-run', nargs='*', type=int, default=None, help='Space-separated list of indexes of queries to test.') help="test description file",
parser.add_argument('--max-query-seconds', type=int, default=15, help='For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.') )
parser.add_argument('--prewarm-max-query-seconds', type=int, default=180, help='For how many seconds at most a prewarm (cold storage) query is allowed to run. The script finishes with error if this time is exceeded.') parser.add_argument(
parser.add_argument('--profile-seconds', type=int, default=0, help='For how many seconds to profile a query for which the performance has changed.') "--host",
parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.') nargs="*",
parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.') default=["localhost"],
parser.add_argument('--print-settings', action='store_true', help='Print test settings and exit.') help="Space-separated list of server hostname(s). Corresponds to '--port' options.",
parser.add_argument('--keep-created-tables', action='store_true', help="Don't drop the created tables after the test.") )
parser.add_argument('--use-existing-tables', action='store_true', help="Don't create or drop the tables, use the existing ones instead.") parser.add_argument(
"--port",
nargs="*",
default=[9000],
help="Space-separated list of server port(s). Corresponds to '--host' options.",
)
parser.add_argument(
"--runs", type=int, default=1, help="Number of query runs per server."
)
parser.add_argument(
"--max-queries",
type=int,
default=None,
help="Test no more than this number of queries, chosen at random.",
)
parser.add_argument(
"--queries-to-run",
nargs="*",
type=int,
default=None,
help="Space-separated list of indexes of queries to test.",
)
parser.add_argument(
"--max-query-seconds",
type=int,
default=15,
help="For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.",
)
parser.add_argument(
"--prewarm-max-query-seconds",
type=int,
default=180,
help="For how many seconds at most a prewarm (cold storage) query is allowed to run. The script finishes with error if this time is exceeded.",
)
parser.add_argument(
"--profile-seconds",
type=int,
default=0,
help="For how many seconds to profile a query for which the performance has changed.",
)
parser.add_argument(
"--long", action="store_true", help="Do not skip the tests tagged as long."
)
parser.add_argument(
"--print-queries", action="store_true", help="Print test queries and exit."
)
parser.add_argument(
"--print-settings", action="store_true", help="Print test settings and exit."
)
parser.add_argument(
"--keep-created-tables",
action="store_true",
help="Don't drop the created tables after the test.",
)
parser.add_argument(
"--use-existing-tables",
action="store_true",
help="Don't create or drop the tables, use the existing ones instead.",
)
args = parser.parse_args() args = parser.parse_args()
reportStageEnd('start') reportStageEnd("start")
test_name = os.path.splitext(os.path.basename(args.file[0].name))[0] test_name = os.path.splitext(os.path.basename(args.file[0].name))[0]
tree = et.parse(args.file[0]) tree = et.parse(args.file[0])
root = tree.getroot() root = tree.getroot()
reportStageEnd('parse') reportStageEnd("parse")
# Process query parameters # Process query parameters
subst_elems = root.findall('substitutions/substitution') subst_elems = root.findall("substitutions/substitution")
available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... } available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... }
for e in subst_elems: for e in subst_elems:
name = e.find('name').text name = e.find("name").text
values = [v.text for v in e.findall('values/value')] values = [v.text for v in e.findall("values/value")]
if not values: if not values:
raise Exception(f'No values given for substitution {{{name}}}') raise Exception(f"No values given for substitution {{{name}}}")
available_parameters[name] = values available_parameters[name] = values
@ -78,7 +146,7 @@ for e in subst_elems:
# parameters. The set of parameters is determined based on the first list. # parameters. The set of parameters is determined based on the first list.
# Note: keep the order of queries -- sometimes we have DROP IF EXISTS # Note: keep the order of queries -- sometimes we have DROP IF EXISTS
# followed by CREATE in create queries section, so the order matters. # followed by CREATE in create queries section, so the order matters.
def substitute_parameters(query_templates, other_templates = []): def substitute_parameters(query_templates, other_templates=[]):
query_results = [] query_results = []
other_results = [[]] * (len(other_templates)) other_results = [[]] * (len(other_templates))
for i, q in enumerate(query_templates): for i, q in enumerate(query_templates):
@ -103,17 +171,21 @@ def substitute_parameters(query_templates, other_templates = []):
# and reporting the queries marked as short. # and reporting the queries marked as short.
test_queries = [] test_queries = []
is_short = [] is_short = []
for e in root.findall('query'): for e in root.findall("query"):
new_queries, [new_is_short] = substitute_parameters([e.text], [[e.attrib.get('short', '0')]]) new_queries, [new_is_short] = substitute_parameters(
[e.text], [[e.attrib.get("short", "0")]]
)
test_queries += new_queries test_queries += new_queries
is_short += [eval(s) for s in new_is_short] is_short += [eval(s) for s in new_is_short]
assert(len(test_queries) == len(is_short)) assert len(test_queries) == len(is_short)
# If we're given a list of queries to run, check that it makes sense. # If we're given a list of queries to run, check that it makes sense.
for i in args.queries_to_run or []: for i in args.queries_to_run or []:
if i < 0 or i >= len(test_queries): if i < 0 or i >= len(test_queries):
print(f'There is no query no. {i} in this test, only [{0}-{len(test_queries) - 1}] are present') print(
f"There is no query no. {i} in this test, only [{0}-{len(test_queries) - 1}] are present"
)
exit(1) exit(1)
# If we're only asked to print the queries, do that and exit. # If we're only asked to print the queries, do that and exit.
@ -125,60 +197,65 @@ if args.print_queries:
# Print short queries # Print short queries
for i, s in enumerate(is_short): for i, s in enumerate(is_short):
if s: if s:
print(f'short\t{i}') print(f"short\t{i}")
# If we're only asked to print the settings, do that and exit. These are settings # If we're only asked to print the settings, do that and exit. These are settings
# for clickhouse-benchmark, so we print them as command line arguments, e.g. # for clickhouse-benchmark, so we print them as command line arguments, e.g.
# '--max_memory_usage=10000000'. # '--max_memory_usage=10000000'.
if args.print_settings: if args.print_settings:
for s in root.findall('settings/*'): for s in root.findall("settings/*"):
print(f'--{s.tag}={s.text}') print(f"--{s.tag}={s.text}")
exit(0) exit(0)
# Skip long tests # Skip long tests
if not args.long: if not args.long:
for tag in root.findall('.//tag'): for tag in root.findall(".//tag"):
if tag.text == 'long': if tag.text == "long":
print('skipped\tTest is tagged as long.') print("skipped\tTest is tagged as long.")
sys.exit(0) sys.exit(0)
# Print report threshold for the test if it is set. # Print report threshold for the test if it is set.
ignored_relative_change = 0.05 ignored_relative_change = 0.05
if 'max_ignored_relative_change' in root.attrib: if "max_ignored_relative_change" in root.attrib:
ignored_relative_change = float(root.attrib["max_ignored_relative_change"]) ignored_relative_change = float(root.attrib["max_ignored_relative_change"])
print(f'report-threshold\t{ignored_relative_change}') print(f"report-threshold\t{ignored_relative_change}")
reportStageEnd('before-connect') reportStageEnd("before-connect")
# Open connections # Open connections
servers = [{'host': host or args.host[0], 'port': port or args.port[0]} for (host, port) in itertools.zip_longest(args.host, args.port)] servers = [
{"host": host or args.host[0], "port": port or args.port[0]}
for (host, port) in itertools.zip_longest(args.host, args.port)
]
# Force settings_is_important to fail queries on unknown settings. # Force settings_is_important to fail queries on unknown settings.
all_connections = [clickhouse_driver.Client(**server, settings_is_important=True) for server in servers] all_connections = [
clickhouse_driver.Client(**server, settings_is_important=True) for server in servers
]
for i, s in enumerate(servers): for i, s in enumerate(servers):
print(f'server\t{i}\t{s["host"]}\t{s["port"]}') print(f'server\t{i}\t{s["host"]}\t{s["port"]}')
reportStageEnd('connect') reportStageEnd("connect")
if not args.use_existing_tables: if not args.use_existing_tables:
# Run drop queries, ignoring errors. Do this before all other activity, # Run drop queries, ignoring errors. Do this before all other activity,
# because clickhouse_driver disconnects on error (this is not configurable), # because clickhouse_driver disconnects on error (this is not configurable),
# and the new connection loses the changes in settings. # and the new connection loses the changes in settings.
drop_query_templates = [q.text for q in root.findall('drop_query')] drop_query_templates = [q.text for q in root.findall("drop_query")]
drop_queries = substitute_parameters(drop_query_templates) drop_queries = substitute_parameters(drop_query_templates)
for conn_index, c in enumerate(all_connections): for conn_index, c in enumerate(all_connections):
for q in drop_queries: for q in drop_queries:
try: try:
c.execute(q) c.execute(q)
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') print(f"drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}")
except: except:
pass pass
reportStageEnd('drop-1') reportStageEnd("drop-1")
# Apply settings. # Apply settings.
settings = root.findall('settings/*') settings = root.findall("settings/*")
for conn_index, c in enumerate(all_connections): for conn_index, c in enumerate(all_connections):
for s in settings: for s in settings:
# requires clickhouse-driver >= 1.1.5 to accept arbitrary new settings # requires clickhouse-driver >= 1.1.5 to accept arbitrary new settings
@ -189,48 +266,52 @@ for conn_index, c in enumerate(all_connections):
# the test, which is wrong. # the test, which is wrong.
c.execute("select 1") c.execute("select 1")
reportStageEnd('settings') reportStageEnd("settings")
# Check tables that should exist. If they don't exist, just skip this test. # Check tables that should exist. If they don't exist, just skip this test.
tables = [e.text for e in root.findall('preconditions/table_exists')] tables = [e.text for e in root.findall("preconditions/table_exists")]
for t in tables: for t in tables:
for c in all_connections: for c in all_connections:
try: try:
res = c.execute("select 1 from {} limit 1".format(t)) res = c.execute("select 1 from {} limit 1".format(t))
except: except:
exception_message = traceback.format_exception_only(*sys.exc_info()[:2])[-1] exception_message = traceback.format_exception_only(*sys.exc_info()[:2])[-1]
skipped_message = ' '.join(exception_message.split('\n')[:2]) skipped_message = " ".join(exception_message.split("\n")[:2])
print(f'skipped\t{tsv_escape(skipped_message)}') print(f"skipped\t{tsv_escape(skipped_message)}")
sys.exit(0) sys.exit(0)
reportStageEnd('preconditions') reportStageEnd("preconditions")
if not args.use_existing_tables: if not args.use_existing_tables:
# Run create and fill queries. We will run them simultaneously for both # Run create and fill queries. We will run them simultaneously for both
# servers, to save time. The weird XML search + filter is because we want to # servers, to save time. The weird XML search + filter is because we want to
# keep the relative order of elements, and etree doesn't support the # keep the relative order of elements, and etree doesn't support the
# appropriate xpath query. # appropriate xpath query.
create_query_templates = [q.text for q in root.findall('./*') create_query_templates = [
if q.tag in ('create_query', 'fill_query')] q.text for q in root.findall("./*") if q.tag in ("create_query", "fill_query")
]
create_queries = substitute_parameters(create_query_templates) create_queries = substitute_parameters(create_query_templates)
# Disallow temporary tables, because the clickhouse_driver reconnects on # Disallow temporary tables, because the clickhouse_driver reconnects on
# errors, and temporary tables are destroyed. We want to be able to continue # errors, and temporary tables are destroyed. We want to be able to continue
# after some errors. # after some errors.
for q in create_queries: for q in create_queries:
if re.search('create temporary table', q, flags=re.IGNORECASE): if re.search("create temporary table", q, flags=re.IGNORECASE):
print(f"Temporary tables are not allowed in performance tests: '{q}'", print(
file = sys.stderr) f"Temporary tables are not allowed in performance tests: '{q}'",
file=sys.stderr,
)
sys.exit(1) sys.exit(1)
def do_create(connection, index, queries): def do_create(connection, index, queries):
for q in queries: for q in queries:
connection.execute(q) connection.execute(q)
print(f'create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}') print(f"create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}")
threads = [ threads = [
Thread(target = do_create, args = (connection, index, create_queries)) Thread(target=do_create, args=(connection, index, create_queries))
for index, connection in enumerate(all_connections)] for index, connection in enumerate(all_connections)
]
for t in threads: for t in threads:
t.start() t.start()
@ -238,14 +319,16 @@ if not args.use_existing_tables:
for t in threads: for t in threads:
t.join() t.join()
reportStageEnd('create') reportStageEnd("create")
# By default, test all queries. # By default, test all queries.
queries_to_run = range(0, len(test_queries)) queries_to_run = range(0, len(test_queries))
if args.max_queries: if args.max_queries:
# If specified, test a limited number of queries chosen at random. # If specified, test a limited number of queries chosen at random.
queries_to_run = random.sample(range(0, len(test_queries)), min(len(test_queries), args.max_queries)) queries_to_run = random.sample(
range(0, len(test_queries)), min(len(test_queries), args.max_queries)
)
if args.queries_to_run: if args.queries_to_run:
# Run the specified queries. # Run the specified queries.
@ -255,16 +338,16 @@ if args.queries_to_run:
profile_total_seconds = 0 profile_total_seconds = 0
for query_index in queries_to_run: for query_index in queries_to_run:
q = test_queries[query_index] q = test_queries[query_index]
query_prefix = f'{test_name}.query{query_index}' query_prefix = f"{test_name}.query{query_index}"
# We have some crazy long queries (about 100kB), so trim them to a sane # We have some crazy long queries (about 100kB), so trim them to a sane
# length. This means we can't use query text as an identifier and have to # length. This means we can't use query text as an identifier and have to
# use the test name + the test-wide query index. # use the test name + the test-wide query index.
query_display_name = q query_display_name = q
if len(query_display_name) > 1000: if len(query_display_name) > 1000:
query_display_name = f'{query_display_name[:1000]}...({query_index})' query_display_name = f"{query_display_name[:1000]}...({query_index})"
print(f'display-name\t{query_index}\t{tsv_escape(query_display_name)}') print(f"display-name\t{query_index}\t{tsv_escape(query_display_name)}")
# Prewarm: run once on both servers. Helps to bring the data into memory, # Prewarm: run once on both servers. Helps to bring the data into memory,
# precompile the queries, etc. # precompile the queries, etc.
@ -272,10 +355,10 @@ for query_index in queries_to_run:
# new one. We want to run them on the new server only, so that the PR author # new one. We want to run them on the new server only, so that the PR author
# can ensure that the test works properly. Remember the errors we had on # can ensure that the test works properly. Remember the errors we had on
# each server. # each server.
query_error_on_connection = [None] * len(all_connections); query_error_on_connection = [None] * len(all_connections)
for conn_index, c in enumerate(all_connections): for conn_index, c in enumerate(all_connections):
try: try:
prewarm_id = f'{query_prefix}.prewarm0' prewarm_id = f"{query_prefix}.prewarm0"
try: try:
# During the warmup runs, we will also: # During the warmup runs, we will also:
@ -283,25 +366,30 @@ for query_index in queries_to_run:
# * collect profiler traces, which might be helpful for analyzing # * collect profiler traces, which might be helpful for analyzing
# test coverage. We disable profiler for normal runs because # test coverage. We disable profiler for normal runs because
# it makes the results unstable. # it makes the results unstable.
res = c.execute(q, query_id = prewarm_id, res = c.execute(
settings = { q,
'max_execution_time': args.prewarm_max_query_seconds, query_id=prewarm_id,
'query_profiler_real_time_period_ns': 10000000, settings={
'memory_profiler_step': '4Mi', "max_execution_time": args.prewarm_max_query_seconds,
}) "query_profiler_real_time_period_ns": 10000000,
"memory_profiler_step": "4Mi",
},
)
except clickhouse_driver.errors.Error as e: except clickhouse_driver.errors.Error as e:
# Add query id to the exception to make debugging easier. # Add query id to the exception to make debugging easier.
e.args = (prewarm_id, *e.args) e.args = (prewarm_id, *e.args)
e.message = prewarm_id + ': ' + e.message e.message = prewarm_id + ": " + e.message
raise raise
print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}') print(
f"prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}"
)
except KeyboardInterrupt: except KeyboardInterrupt:
raise raise
except: except:
# FIXME the driver reconnects on error and we lose settings, so this # FIXME the driver reconnects on error and we lose settings, so this
# might lead to further errors or unexpected behavior. # might lead to further errors or unexpected behavior.
query_error_on_connection[conn_index] = traceback.format_exc(); query_error_on_connection[conn_index] = traceback.format_exc()
continue continue
# Report all errors that ocurred during prewarm and decide what to do next. # Report all errors that ocurred during prewarm and decide what to do next.
@ -311,14 +399,14 @@ for query_index in queries_to_run:
no_errors = [] no_errors = []
for i, e in enumerate(query_error_on_connection): for i, e in enumerate(query_error_on_connection):
if e: if e:
print(e, file = sys.stderr) print(e, file=sys.stderr)
else: else:
no_errors.append(i) no_errors.append(i)
if len(no_errors) == 0: if len(no_errors) == 0:
continue continue
elif len(no_errors) < len(all_connections): elif len(no_errors) < len(all_connections):
print(f'partial\t{query_index}\t{no_errors}') print(f"partial\t{query_index}\t{no_errors}")
this_query_connections = [all_connections[index] for index in no_errors] this_query_connections = [all_connections[index] for index in no_errors]
@ -337,27 +425,34 @@ for query_index in queries_to_run:
all_server_times.append([]) all_server_times.append([])
while True: while True:
run_id = f'{query_prefix}.run{run}' run_id = f"{query_prefix}.run{run}"
for conn_index, c in enumerate(this_query_connections): for conn_index, c in enumerate(this_query_connections):
try: try:
res = c.execute(q, query_id = run_id, settings = {'max_execution_time': args.max_query_seconds}) res = c.execute(
q,
query_id=run_id,
settings={"max_execution_time": args.max_query_seconds},
)
except clickhouse_driver.errors.Error as e: except clickhouse_driver.errors.Error as e:
# Add query id to the exception to make debugging easier. # Add query id to the exception to make debugging easier.
e.args = (run_id, *e.args) e.args = (run_id, *e.args)
e.message = run_id + ': ' + e.message e.message = run_id + ": " + e.message
raise raise
elapsed = c.last_query.elapsed elapsed = c.last_query.elapsed
all_server_times[conn_index].append(elapsed) all_server_times[conn_index].append(elapsed)
server_seconds += elapsed server_seconds += elapsed
print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}') print(f"query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}")
if elapsed > args.max_query_seconds: if elapsed > args.max_query_seconds:
# Do not stop processing pathologically slow queries, # Do not stop processing pathologically slow queries,
# since this may hide errors in other queries. # since this may hide errors in other queries.
print(f'The query no. {query_index} is taking too long to run ({elapsed} s)', file=sys.stderr) print(
f"The query no. {query_index} is taking too long to run ({elapsed} s)",
file=sys.stderr,
)
# Be careful with the counter, after this line it's the next iteration # Be careful with the counter, after this line it's the next iteration
# already. # already.
@ -386,7 +481,7 @@ for query_index in queries_to_run:
break break
client_seconds = time.perf_counter() - start_seconds client_seconds = time.perf_counter() - start_seconds
print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}') print(f"client-time\t{query_index}\t{client_seconds}\t{server_seconds}")
# Run additional profiling queries to collect profile data, but only if test times appeared to be different. # Run additional profiling queries to collect profile data, but only if test times appeared to be different.
# We have to do it after normal runs because otherwise it will affect test statistics too much # We have to do it after normal runs because otherwise it will affect test statistics too much
@ -397,13 +492,15 @@ for query_index in queries_to_run:
# Don't fail if for some reason there are not enough measurements. # Don't fail if for some reason there are not enough measurements.
continue continue
pvalue = stats.ttest_ind(all_server_times[0], all_server_times[1], equal_var = False).pvalue pvalue = stats.ttest_ind(
all_server_times[0], all_server_times[1], equal_var=False
).pvalue
median = [statistics.median(t) for t in all_server_times] median = [statistics.median(t) for t in all_server_times]
# Keep this consistent with the value used in report. Should eventually move # Keep this consistent with the value used in report. Should eventually move
# to (median[1] - median[0]) / min(median), which is compatible with "times" # to (median[1] - median[0]) / min(median), which is compatible with "times"
# difference we use in report (max(median) / min(median)). # difference we use in report (max(median) / min(median)).
relative_diff = (median[1] - median[0]) / median[0] relative_diff = (median[1] - median[0]) / median[0]
print(f'diff\t{query_index}\t{median[0]}\t{median[1]}\t{relative_diff}\t{pvalue}') print(f"diff\t{query_index}\t{median[0]}\t{median[1]}\t{relative_diff}\t{pvalue}")
if abs(relative_diff) < ignored_relative_change or pvalue > 0.05: if abs(relative_diff) < ignored_relative_change or pvalue > 0.05:
continue continue
@ -412,25 +509,31 @@ for query_index in queries_to_run:
profile_start_seconds = time.perf_counter() profile_start_seconds = time.perf_counter()
run = 0 run = 0
while time.perf_counter() - profile_start_seconds < args.profile_seconds: while time.perf_counter() - profile_start_seconds < args.profile_seconds:
run_id = f'{query_prefix}.profile{run}' run_id = f"{query_prefix}.profile{run}"
for conn_index, c in enumerate(this_query_connections): for conn_index, c in enumerate(this_query_connections):
try: try:
res = c.execute(q, query_id = run_id, settings = {'query_profiler_real_time_period_ns': 10000000}) res = c.execute(
print(f'profile\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}') q,
query_id=run_id,
settings={"query_profiler_real_time_period_ns": 10000000},
)
print(
f"profile\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}"
)
except clickhouse_driver.errors.Error as e: except clickhouse_driver.errors.Error as e:
# Add query id to the exception to make debugging easier. # Add query id to the exception to make debugging easier.
e.args = (run_id, *e.args) e.args = (run_id, *e.args)
e.message = run_id + ': ' + e.message e.message = run_id + ": " + e.message
raise raise
run += 1 run += 1
profile_total_seconds += time.perf_counter() - profile_start_seconds profile_total_seconds += time.perf_counter() - profile_start_seconds
print(f'profile-total\t{profile_total_seconds}') print(f"profile-total\t{profile_total_seconds}")
reportStageEnd('run') reportStageEnd("run")
# Run drop queries # Run drop queries
if not args.keep_created_tables and not args.use_existing_tables: if not args.keep_created_tables and not args.use_existing_tables:
@ -438,6 +541,6 @@ if not args.keep_created_tables and not args.use_existing_tables:
for conn_index, c in enumerate(all_connections): for conn_index, c in enumerate(all_connections):
for q in drop_queries: for q in drop_queries:
c.execute(q) c.execute(q)
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') print(f"drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}")
reportStageEnd('drop-2') reportStageEnd("drop-2")

View File

@ -12,9 +12,13 @@ import pprint
import sys import sys
import traceback import traceback
parser = argparse.ArgumentParser(description='Create performance test report') parser = argparse.ArgumentParser(description="Create performance test report")
parser.add_argument('--report', default='main', choices=['main', 'all-queries'], parser.add_argument(
help='Which report to build') "--report",
default="main",
choices=["main", "all-queries"],
help="Which report to build",
)
args = parser.parse_args() args = parser.parse_args()
tables = [] tables = []
@ -31,8 +35,8 @@ unstable_partial_queries = 0
# max seconds to run one query by itself, not counting preparation # max seconds to run one query by itself, not counting preparation
allowed_single_run_time = 2 allowed_single_run_time = 2
color_bad='#ffb0c0' color_bad = "#ffb0c0"
color_good='#b0d050' color_good = "#b0d050"
header_template = """ header_template = """
<!DOCTYPE html> <!DOCTYPE html>
@ -151,24 +155,29 @@ tr:nth-child(odd) td {{filter: brightness(90%);}}
table_anchor = 0 table_anchor = 0
row_anchor = 0 row_anchor = 0
def currentTableAnchor(): def currentTableAnchor():
global table_anchor global table_anchor
return f'{table_anchor}' return f"{table_anchor}"
def newTableAnchor(): def newTableAnchor():
global table_anchor global table_anchor
table_anchor += 1 table_anchor += 1
return currentTableAnchor() return currentTableAnchor()
def currentRowAnchor(): def currentRowAnchor():
global row_anchor global row_anchor
global table_anchor global table_anchor
return f'{table_anchor}.{row_anchor}' return f"{table_anchor}.{row_anchor}"
def nextRowAnchor(): def nextRowAnchor():
global row_anchor global row_anchor
global table_anchor global table_anchor
return f'{table_anchor}.{row_anchor + 1}' return f"{table_anchor}.{row_anchor + 1}"
def advanceRowAnchor(): def advanceRowAnchor():
global row_anchor global row_anchor
@ -178,43 +187,58 @@ def advanceRowAnchor():
def tr(x, anchor=None): def tr(x, anchor=None):
#return '<tr onclick="location.href=\'#{a}\'" id={a}>{x}</tr>'.format(a=a, x=str(x)) # return '<tr onclick="location.href=\'#{a}\'" id={a}>{x}</tr>'.format(a=a, x=str(x))
anchor = anchor if anchor else advanceRowAnchor() anchor = anchor if anchor else advanceRowAnchor()
return f'<tr id={anchor}>{x}</tr>' return f"<tr id={anchor}>{x}</tr>"
def td(value, cell_attributes = ''):
return '<td {cell_attributes}>{value}</td>'.format(
cell_attributes = cell_attributes,
value = value)
def th(value, cell_attributes = ''): def td(value, cell_attributes=""):
return '<th {cell_attributes}>{value}</th>'.format( return "<td {cell_attributes}>{value}</td>".format(
cell_attributes = cell_attributes, cell_attributes=cell_attributes, value=value
value = value) )
def tableRow(cell_values, cell_attributes = [], anchor=None):
def th(value, cell_attributes=""):
return "<th {cell_attributes}>{value}</th>".format(
cell_attributes=cell_attributes, value=value
)
def tableRow(cell_values, cell_attributes=[], anchor=None):
return tr( return tr(
''.join([td(v, a) "".join(
for v, a in itertools.zip_longest( [
cell_values, cell_attributes, td(v, a)
fillvalue = '') for v, a in itertools.zip_longest(
if a is not None and v is not None]), cell_values, cell_attributes, fillvalue=""
anchor) )
if a is not None and v is not None
]
),
anchor,
)
def tableHeader(cell_values, cell_attributes = []):
def tableHeader(cell_values, cell_attributes=[]):
return tr( return tr(
''.join([th(v, a) "".join(
for v, a in itertools.zip_longest( [
cell_values, cell_attributes, th(v, a)
fillvalue = '') for v, a in itertools.zip_longest(
if a is not None and v is not None])) cell_values, cell_attributes, fillvalue=""
)
if a is not None and v is not None
]
)
)
def tableStart(title): def tableStart(title):
cls = '-'.join(title.lower().split(' ')[:3]); cls = "-".join(title.lower().split(" ")[:3])
global table_anchor global table_anchor
table_anchor = cls table_anchor = cls
anchor = currentTableAnchor() anchor = currentTableAnchor()
help_anchor = '-'.join(title.lower().split(' ')); help_anchor = "-".join(title.lower().split(" "))
return f""" return f"""
<h2 id="{anchor}"> <h2 id="{anchor}">
<a class="cancela" href="#{anchor}">{title}</a> <a class="cancela" href="#{anchor}">{title}</a>
@ -223,12 +247,14 @@ def tableStart(title):
<table class="{cls}"> <table class="{cls}">
""" """
def tableEnd(): def tableEnd():
return '</table>' return "</table>"
def tsvRows(n): def tsvRows(n):
try: try:
with open(n, encoding='utf-8') as fd: with open(n, encoding="utf-8") as fd:
result = [] result = []
for row in csv.reader(fd, delimiter="\t", quoting=csv.QUOTE_NONE): for row in csv.reader(fd, delimiter="\t", quoting=csv.QUOTE_NONE):
new_row = [] new_row = []
@ -237,27 +263,32 @@ def tsvRows(n):
# The second one (encode('latin1').decode('utf-8')) fixes the changes with unicode vs utf-8 chars, so # The second one (encode('latin1').decode('utf-8')) fixes the changes with unicode vs utf-8 chars, so
# 'Чем зÐ<C2B7>нимаеÑ<C2B5>ЬÑ<C2AC>Ñ<EFBFBD>' is transformed back into 'Чем зАнимаешЬся'. # 'Чем зÐ<C2B7>нимаеÑ<C2B5>ЬÑ<C2AC>Ñ<EFBFBD>' is transformed back into 'Чем зАнимаешЬся'.
new_row.append(e.encode('utf-8').decode('unicode-escape').encode('latin1').decode('utf-8')) new_row.append(
e.encode("utf-8")
.decode("unicode-escape")
.encode("latin1")
.decode("utf-8")
)
result.append(new_row) result.append(new_row)
return result return result
except: except:
report_errors.append( report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
traceback.format_exception_only(
*sys.exc_info()[:2])[-1])
pass pass
return [] return []
def htmlRows(n): def htmlRows(n):
rawRows = tsvRows(n) rawRows = tsvRows(n)
result = '' result = ""
for row in rawRows: for row in rawRows:
result += tableRow(row) result += tableRow(row)
return result return result
def addSimpleTable(caption, columns, rows, pos=None): def addSimpleTable(caption, columns, rows, pos=None):
global tables global tables
text = '' text = ""
if not rows: if not rows:
return return
@ -268,51 +299,63 @@ def addSimpleTable(caption, columns, rows, pos=None):
text += tableEnd() text += tableEnd()
tables.insert(pos if pos else len(tables), text) tables.insert(pos if pos else len(tables), text)
def add_tested_commits(): def add_tested_commits():
global report_errors global report_errors
try: try:
addSimpleTable('Tested Commits', ['Old', 'New'], addSimpleTable(
[['<pre>{}</pre>'.format(x) for x in "Tested Commits",
[open('left-commit.txt').read(), ["Old", "New"],
open('right-commit.txt').read()]]]) [
[
"<pre>{}</pre>".format(x)
for x in [
open("left-commit.txt").read(),
open("right-commit.txt").read(),
]
]
],
)
except: except:
# Don't fail if no commit info -- maybe it's a manual run. # Don't fail if no commit info -- maybe it's a manual run.
report_errors.append( report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
traceback.format_exception_only(
*sys.exc_info()[:2])[-1])
pass pass
def add_report_errors(): def add_report_errors():
global tables global tables
global report_errors global report_errors
# Add the errors reported by various steps of comparison script # Add the errors reported by various steps of comparison script
try: try:
report_errors += [l.strip() for l in open('report/errors.log')] report_errors += [l.strip() for l in open("report/errors.log")]
except: except:
report_errors.append( report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
traceback.format_exception_only(
*sys.exc_info()[:2])[-1])
pass pass
if not report_errors: if not report_errors:
return return
text = tableStart('Errors while Building the Report') text = tableStart("Errors while Building the Report")
text += tableHeader(['Error']) text += tableHeader(["Error"])
for x in report_errors: for x in report_errors:
text += tableRow([x]) text += tableRow([x])
text += tableEnd() text += tableEnd()
# Insert after Tested Commits # Insert after Tested Commits
tables.insert(1, text) tables.insert(1, text)
errors_explained.append([f'<a href="#{currentTableAnchor()}">There were some errors while building the report</a>']); errors_explained.append(
[
f'<a href="#{currentTableAnchor()}">There were some errors while building the report</a>'
]
)
def add_errors_explained(): def add_errors_explained():
if not errors_explained: if not errors_explained:
return return
text = '<a name="fail1"/>' text = '<a name="fail1"/>'
text += tableStart('Error Summary') text += tableStart("Error Summary")
text += tableHeader(['Description']) text += tableHeader(["Description"])
for row in errors_explained: for row in errors_explained:
text += tableRow(row) text += tableRow(row)
text += tableEnd() text += tableEnd()
@ -321,59 +364,81 @@ def add_errors_explained():
tables.insert(1, text) tables.insert(1, text)
if args.report == 'main': if args.report == "main":
print((header_template.format())) print((header_template.format()))
add_tested_commits() add_tested_commits()
run_error_rows = tsvRows("run-errors.tsv")
run_error_rows = tsvRows('run-errors.tsv')
error_tests += len(run_error_rows) error_tests += len(run_error_rows)
addSimpleTable('Run Errors', ['Test', 'Error'], run_error_rows) addSimpleTable("Run Errors", ["Test", "Error"], run_error_rows)
if run_error_rows: if run_error_rows:
errors_explained.append([f'<a href="#{currentTableAnchor()}">There were some errors while running the tests</a>']); errors_explained.append(
[
f'<a href="#{currentTableAnchor()}">There were some errors while running the tests</a>'
]
)
slow_on_client_rows = tsvRows("report/slow-on-client.tsv")
slow_on_client_rows = tsvRows('report/slow-on-client.tsv')
error_tests += len(slow_on_client_rows) error_tests += len(slow_on_client_rows)
addSimpleTable('Slow on Client', addSimpleTable(
['Client time,&nbsp;s', 'Server time,&nbsp;s', 'Ratio', 'Test', 'Query'], "Slow on Client",
slow_on_client_rows) ["Client time,&nbsp;s", "Server time,&nbsp;s", "Ratio", "Test", "Query"],
slow_on_client_rows,
)
if slow_on_client_rows: if slow_on_client_rows:
errors_explained.append([f'<a href="#{currentTableAnchor()}">Some queries are taking noticeable time client-side (missing `FORMAT Null`?)</a>']); errors_explained.append(
[
f'<a href="#{currentTableAnchor()}">Some queries are taking noticeable time client-side (missing `FORMAT Null`?)</a>'
]
)
unmarked_short_rows = tsvRows('report/unexpected-query-duration.tsv') unmarked_short_rows = tsvRows("report/unexpected-query-duration.tsv")
error_tests += len(unmarked_short_rows) error_tests += len(unmarked_short_rows)
addSimpleTable('Unexpected Query Duration', addSimpleTable(
['Problem', 'Marked as "short"?', 'Run time, s', 'Test', '#', 'Query'], "Unexpected Query Duration",
unmarked_short_rows) ["Problem", 'Marked as "short"?', "Run time, s", "Test", "#", "Query"],
unmarked_short_rows,
)
if unmarked_short_rows: if unmarked_short_rows:
errors_explained.append([f'<a href="#{currentTableAnchor()}">Some queries have unexpected duration</a>']); errors_explained.append(
[
f'<a href="#{currentTableAnchor()}">Some queries have unexpected duration</a>'
]
)
def add_partial(): def add_partial():
rows = tsvRows('report/partial-queries-report.tsv') rows = tsvRows("report/partial-queries-report.tsv")
if not rows: if not rows:
return return
global unstable_partial_queries, slow_average_tests, tables global unstable_partial_queries, slow_average_tests, tables
text = tableStart('Partial Queries') text = tableStart("Partial Queries")
columns = ['Median time, s', 'Relative time variance', 'Test', '#', 'Query'] columns = ["Median time, s", "Relative time variance", "Test", "#", "Query"]
text += tableHeader(columns) text += tableHeader(columns)
attrs = ['' for c in columns] attrs = ["" for c in columns]
for row in rows: for row in rows:
anchor = f'{currentTableAnchor()}.{row[2]}.{row[3]}' anchor = f"{currentTableAnchor()}.{row[2]}.{row[3]}"
if float(row[1]) > 0.10: if float(row[1]) > 0.10:
attrs[1] = f'style="background: {color_bad}"' attrs[1] = f'style="background: {color_bad}"'
unstable_partial_queries += 1 unstable_partial_queries += 1
errors_explained.append([f'<a href="#{anchor}">The query no. {row[3]} of test \'{row[2]}\' has excessive variance of run time. Keep it below 10%</a>']) errors_explained.append(
[
f"<a href=\"#{anchor}\">The query no. {row[3]} of test '{row[2]}' has excessive variance of run time. Keep it below 10%</a>"
]
)
else: else:
attrs[1] = '' attrs[1] = ""
if float(row[0]) > allowed_single_run_time: if float(row[0]) > allowed_single_run_time:
attrs[0] = f'style="background: {color_bad}"' attrs[0] = f'style="background: {color_bad}"'
errors_explained.append([f'<a href="#{anchor}">The query no. {row[3]} of test \'{row[2]}\' is taking too long to run. Keep the run time below {allowed_single_run_time} seconds"</a>']) errors_explained.append(
[
f'<a href="#{anchor}">The query no. {row[3]} of test \'{row[2]}\' is taking too long to run. Keep the run time below {allowed_single_run_time} seconds"</a>'
]
)
slow_average_tests += 1 slow_average_tests += 1
else: else:
attrs[0] = '' attrs[0] = ""
text += tableRow(row, attrs, anchor) text += tableRow(row, attrs, anchor)
text += tableEnd() text += tableEnd()
tables.append(text) tables.append(text)
@ -381,41 +446,45 @@ if args.report == 'main':
add_partial() add_partial()
def add_changes(): def add_changes():
rows = tsvRows('report/changed-perf.tsv') rows = tsvRows("report/changed-perf.tsv")
if not rows: if not rows:
return return
global faster_queries, slower_queries, tables global faster_queries, slower_queries, tables
text = tableStart('Changes in Performance') text = tableStart("Changes in Performance")
columns = [ columns = [
'Old,&nbsp;s', # 0 "Old,&nbsp;s", # 0
'New,&nbsp;s', # 1 "New,&nbsp;s", # 1
'Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)', # 2 "Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)", # 2
'Relative difference (new&nbsp;&minus;&nbsp;old) / old', # 3 "Relative difference (new&nbsp;&minus;&nbsp;old) / old", # 3
'p&nbsp;<&nbsp;0.01 threshold', # 4 "p&nbsp;<&nbsp;0.01 threshold", # 4
'', # Failed # 5 "", # Failed # 5
'Test', # 6 "Test", # 6
'#', # 7 "#", # 7
'Query', # 8 "Query", # 8
] ]
attrs = ['' for c in columns] attrs = ["" for c in columns]
attrs[5] = None attrs[5] = None
text += tableHeader(columns, attrs) text += tableHeader(columns, attrs)
for row in rows: for row in rows:
anchor = f'{currentTableAnchor()}.{row[6]}.{row[7]}' anchor = f"{currentTableAnchor()}.{row[6]}.{row[7]}"
if int(row[5]): if int(row[5]):
if float(row[3]) < 0.: if float(row[3]) < 0.0:
faster_queries += 1 faster_queries += 1
attrs[2] = attrs[3] = f'style="background: {color_good}"' attrs[2] = attrs[3] = f'style="background: {color_good}"'
else: else:
slower_queries += 1 slower_queries += 1
attrs[2] = attrs[3] = f'style="background: {color_bad}"' attrs[2] = attrs[3] = f'style="background: {color_bad}"'
errors_explained.append([f'<a href="#{anchor}">The query no. {row[7]} of test \'{row[6]}\' has slowed down</a>']) errors_explained.append(
[
f"<a href=\"#{anchor}\">The query no. {row[7]} of test '{row[6]}' has slowed down</a>"
]
)
else: else:
attrs[2] = attrs[3] = '' attrs[2] = attrs[3] = ""
text += tableRow(row, attrs, anchor) text += tableRow(row, attrs, anchor)
@ -427,35 +496,35 @@ if args.report == 'main':
def add_unstable_queries(): def add_unstable_queries():
global unstable_queries, very_unstable_queries, tables global unstable_queries, very_unstable_queries, tables
unstable_rows = tsvRows('report/unstable-queries.tsv') unstable_rows = tsvRows("report/unstable-queries.tsv")
if not unstable_rows: if not unstable_rows:
return return
unstable_queries += len(unstable_rows) unstable_queries += len(unstable_rows)
columns = [ columns = [
'Old,&nbsp;s', #0 "Old,&nbsp;s", # 0
'New,&nbsp;s', #1 "New,&nbsp;s", # 1
'Relative difference (new&nbsp;-&nbsp;old)/old', #2 "Relative difference (new&nbsp;-&nbsp;old)/old", # 2
'p&nbsp;&lt;&nbsp;0.01 threshold', #3 "p&nbsp;&lt;&nbsp;0.01 threshold", # 3
'', # Failed #4 "", # Failed #4
'Test', #5 "Test", # 5
'#', #6 "#", # 6
'Query' #7 "Query", # 7
] ]
attrs = ['' for c in columns] attrs = ["" for c in columns]
attrs[4] = None attrs[4] = None
text = tableStart('Unstable Queries') text = tableStart("Unstable Queries")
text += tableHeader(columns, attrs) text += tableHeader(columns, attrs)
for r in unstable_rows: for r in unstable_rows:
anchor = f'{currentTableAnchor()}.{r[5]}.{r[6]}' anchor = f"{currentTableAnchor()}.{r[5]}.{r[6]}"
if int(r[4]): if int(r[4]):
very_unstable_queries += 1 very_unstable_queries += 1
attrs[3] = f'style="background: {color_bad}"' attrs[3] = f'style="background: {color_bad}"'
else: else:
attrs[3] = '' attrs[3] = ""
# Just don't add the slightly unstable queries we don't consider # Just don't add the slightly unstable queries we don't consider
# errors. It's not clear what the user should do with them. # errors. It's not clear what the user should do with them.
continue continue
@ -470,53 +539,70 @@ if args.report == 'main':
add_unstable_queries() add_unstable_queries()
skipped_tests_rows = tsvRows('analyze/skipped-tests.tsv') skipped_tests_rows = tsvRows("analyze/skipped-tests.tsv")
addSimpleTable('Skipped Tests', ['Test', 'Reason'], skipped_tests_rows) addSimpleTable("Skipped Tests", ["Test", "Reason"], skipped_tests_rows)
addSimpleTable('Test Performance Changes', addSimpleTable(
['Test', 'Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)', 'Queries', 'Total not OK', 'Changed perf', 'Unstable'], "Test Performance Changes",
tsvRows('report/test-perf-changes.tsv')) [
"Test",
"Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)",
"Queries",
"Total not OK",
"Changed perf",
"Unstable",
],
tsvRows("report/test-perf-changes.tsv"),
)
def add_test_times(): def add_test_times():
global slow_average_tests, tables global slow_average_tests, tables
rows = tsvRows('report/test-times.tsv') rows = tsvRows("report/test-times.tsv")
if not rows: if not rows:
return return
columns = [ columns = [
'Test', #0 "Test", # 0
'Wall clock time, entire test,&nbsp;s', #1 "Wall clock time, entire test,&nbsp;s", # 1
'Total client time for measured query runs,&nbsp;s', #2 "Total client time for measured query runs,&nbsp;s", # 2
'Queries', #3 "Queries", # 3
'Longest query, total for measured runs,&nbsp;s', #4 "Longest query, total for measured runs,&nbsp;s", # 4
'Wall clock time per query,&nbsp;s', #5 "Wall clock time per query,&nbsp;s", # 5
'Shortest query, total for measured runs,&nbsp;s', #6 "Shortest query, total for measured runs,&nbsp;s", # 6
'', # Runs #7 "", # Runs #7
] ]
attrs = ['' for c in columns] attrs = ["" for c in columns]
attrs[7] = None attrs[7] = None
text = tableStart('Test Times') text = tableStart("Test Times")
text += tableHeader(columns, attrs) text += tableHeader(columns, attrs)
allowed_average_run_time = 3.75 # 60 seconds per test at (7 + 1) * 2 runs allowed_average_run_time = 3.75 # 60 seconds per test at (7 + 1) * 2 runs
for r in rows: for r in rows:
anchor = f'{currentTableAnchor()}.{r[0]}' anchor = f"{currentTableAnchor()}.{r[0]}"
total_runs = (int(r[7]) + 1) * 2 # one prewarm run, two servers total_runs = (int(r[7]) + 1) * 2 # one prewarm run, two servers
if r[0] != 'Total' and float(r[5]) > allowed_average_run_time * total_runs: if r[0] != "Total" and float(r[5]) > allowed_average_run_time * total_runs:
# FIXME should be 15s max -- investigate parallel_insert # FIXME should be 15s max -- investigate parallel_insert
slow_average_tests += 1 slow_average_tests += 1
attrs[5] = f'style="background: {color_bad}"' attrs[5] = f'style="background: {color_bad}"'
errors_explained.append([f'<a href="#{anchor}">The test \'{r[0]}\' is too slow to run as a whole. Investigate whether the create and fill queries can be sped up']) errors_explained.append(
[
f"<a href=\"#{anchor}\">The test '{r[0]}' is too slow to run as a whole. Investigate whether the create and fill queries can be sped up"
]
)
else: else:
attrs[5] = '' attrs[5] = ""
if r[0] != 'Total' and float(r[4]) > allowed_single_run_time * total_runs: if r[0] != "Total" and float(r[4]) > allowed_single_run_time * total_runs:
slow_average_tests += 1 slow_average_tests += 1
attrs[4] = f'style="background: {color_bad}"' attrs[4] = f'style="background: {color_bad}"'
errors_explained.append([f'<a href="./all-queries.html#all-query-times.{r[0]}.0">Some query of the test \'{r[0]}\' is too slow to run. See the all queries report']) errors_explained.append(
[
f"<a href=\"./all-queries.html#all-query-times.{r[0]}.0\">Some query of the test '{r[0]}' is too slow to run. See the all queries report"
]
)
else: else:
attrs[4] = '' attrs[4] = ""
text += tableRow(r, attrs, anchor) text += tableRow(r, attrs, anchor)
@ -525,10 +611,17 @@ if args.report == 'main':
add_test_times() add_test_times()
addSimpleTable('Metric Changes', addSimpleTable(
['Metric', 'Old median value', 'New median value', "Metric Changes",
'Relative difference', 'Times difference'], [
tsvRows('metrics/changes.tsv')) "Metric",
"Old median value",
"New median value",
"Relative difference",
"Times difference",
],
tsvRows("metrics/changes.tsv"),
)
add_report_errors() add_report_errors()
add_errors_explained() add_errors_explained()
@ -536,7 +629,8 @@ if args.report == 'main':
for t in tables: for t in tables:
print(t) print(t)
print(f""" print(
f"""
</div> </div>
<p class="links"> <p class="links">
<a href="all-queries.html">All queries</a> <a href="all-queries.html">All queries</a>
@ -546,104 +640,111 @@ if args.report == 'main':
</p> </p>
</body> </body>
</html> </html>
""") """
)
status = 'success' status = "success"
message = 'See the report' message = "See the report"
message_array = [] message_array = []
if slow_average_tests: if slow_average_tests:
status = 'failure' status = "failure"
message_array.append(str(slow_average_tests) + ' too long') message_array.append(str(slow_average_tests) + " too long")
if faster_queries: if faster_queries:
message_array.append(str(faster_queries) + ' faster') message_array.append(str(faster_queries) + " faster")
if slower_queries: if slower_queries:
if slower_queries > 3: if slower_queries > 3:
status = 'failure' status = "failure"
message_array.append(str(slower_queries) + ' slower') message_array.append(str(slower_queries) + " slower")
if unstable_partial_queries: if unstable_partial_queries:
very_unstable_queries += unstable_partial_queries very_unstable_queries += unstable_partial_queries
status = 'failure' status = "failure"
# Don't show mildly unstable queries, only the very unstable ones we # Don't show mildly unstable queries, only the very unstable ones we
# treat as errors. # treat as errors.
if very_unstable_queries: if very_unstable_queries:
if very_unstable_queries > 5: if very_unstable_queries > 5:
error_tests += very_unstable_queries error_tests += very_unstable_queries
status = 'failure' status = "failure"
message_array.append(str(very_unstable_queries) + ' unstable') message_array.append(str(very_unstable_queries) + " unstable")
error_tests += slow_average_tests error_tests += slow_average_tests
if error_tests: if error_tests:
status = 'failure' status = "failure"
message_array.insert(0, str(error_tests) + ' errors') message_array.insert(0, str(error_tests) + " errors")
if message_array: if message_array:
message = ', '.join(message_array) message = ", ".join(message_array)
if report_errors: if report_errors:
status = 'failure' status = "failure"
message = 'Errors while building the report.' message = "Errors while building the report."
print((""" print(
(
"""
<!--status: {status}--> <!--status: {status}-->
<!--message: {message}--> <!--message: {message}-->
""".format(status=status, message=message))) """.format(
status=status, message=message
)
)
)
elif args.report == 'all-queries': elif args.report == "all-queries":
print((header_template.format())) print((header_template.format()))
add_tested_commits() add_tested_commits()
def add_all_queries(): def add_all_queries():
rows = tsvRows('report/all-queries.tsv') rows = tsvRows("report/all-queries.tsv")
if not rows: if not rows:
return return
columns = [ columns = [
'', # Changed #0 "", # Changed #0
'', # Unstable #1 "", # Unstable #1
'Old,&nbsp;s', #2 "Old,&nbsp;s", # 2
'New,&nbsp;s', #3 "New,&nbsp;s", # 3
'Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)', #4 "Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)", # 4
'Relative difference (new&nbsp;&minus;&nbsp;old) / old', #5 "Relative difference (new&nbsp;&minus;&nbsp;old) / old", # 5
'p&nbsp;&lt;&nbsp;0.01 threshold', #6 "p&nbsp;&lt;&nbsp;0.01 threshold", # 6
'Test', #7 "Test", # 7
'#', #8 "#", # 8
'Query', #9 "Query", # 9
] ]
attrs = ['' for c in columns] attrs = ["" for c in columns]
attrs[0] = None attrs[0] = None
attrs[1] = None attrs[1] = None
text = tableStart('All Query Times') text = tableStart("All Query Times")
text += tableHeader(columns, attrs) text += tableHeader(columns, attrs)
for r in rows: for r in rows:
anchor = f'{currentTableAnchor()}.{r[7]}.{r[8]}' anchor = f"{currentTableAnchor()}.{r[7]}.{r[8]}"
if int(r[1]): if int(r[1]):
attrs[6] = f'style="background: {color_bad}"' attrs[6] = f'style="background: {color_bad}"'
else: else:
attrs[6] = '' attrs[6] = ""
if int(r[0]): if int(r[0]):
if float(r[5]) > 0.: if float(r[5]) > 0.0:
attrs[4] = attrs[5] = f'style="background: {color_bad}"' attrs[4] = attrs[5] = f'style="background: {color_bad}"'
else: else:
attrs[4] = attrs[5] = f'style="background: {color_good}"' attrs[4] = attrs[5] = f'style="background: {color_good}"'
else: else:
attrs[4] = attrs[5] = '' attrs[4] = attrs[5] = ""
if (float(r[2]) + float(r[3])) / 2 > allowed_single_run_time: if (float(r[2]) + float(r[3])) / 2 > allowed_single_run_time:
attrs[2] = f'style="background: {color_bad}"' attrs[2] = f'style="background: {color_bad}"'
attrs[3] = f'style="background: {color_bad}"' attrs[3] = f'style="background: {color_bad}"'
else: else:
attrs[2] = '' attrs[2] = ""
attrs[3] = '' attrs[3] = ""
text += tableRow(r, attrs, anchor) text += tableRow(r, attrs, anchor)
@ -655,7 +756,8 @@ elif args.report == 'all-queries':
for t in tables: for t in tables:
print(t) print(t)
print(f""" print(
f"""
</div> </div>
<p class="links"> <p class="links">
<a href="report.html">Main report</a> <a href="report.html">Main report</a>
@ -665,4 +767,5 @@ elif args.report == 'all-queries':
</p> </p>
</body> </body>
</html> </html>
""") """
)

View File

@ -7,18 +7,19 @@ import csv
RESULT_LOG_NAME = "run.log" RESULT_LOG_NAME = "run.log"
def process_result(result_folder): def process_result(result_folder):
status = "success" status = "success"
description = 'Server started and responded' description = "Server started and responded"
summary = [("Smoke test", "OK")] summary = [("Smoke test", "OK")]
with open(os.path.join(result_folder, RESULT_LOG_NAME), 'r') as run_log: with open(os.path.join(result_folder, RESULT_LOG_NAME), "r") as run_log:
lines = run_log.read().split('\n') lines = run_log.read().split("\n")
if not lines or lines[0].strip() != 'OK': if not lines or lines[0].strip() != "OK":
status = "failure" status = "failure"
logging.info("Lines is not ok: %s", str('\n'.join(lines))) logging.info("Lines is not ok: %s", str("\n".join(lines)))
summary = [("Smoke test", "FAIL")] summary = [("Smoke test", "FAIL")]
description = 'Server failed to respond, see result in logs' description = "Server failed to respond, see result in logs"
result_logs = [] result_logs = []
server_log_path = os.path.join(result_folder, "clickhouse-server.log") server_log_path = os.path.join(result_folder, "clickhouse-server.log")
@ -38,20 +39,22 @@ def process_result(result_folder):
def write_results(results_file, status_file, results, status): def write_results(results_file, status_file, results, status):
with open(results_file, 'w') as f: with open(results_file, "w") as f:
out = csv.writer(f, delimiter='\t') out = csv.writer(f, delimiter="\t")
out.writerows(results) out.writerows(results)
with open(status_file, 'w') as f: with open(status_file, "w") as f:
out = csv.writer(f, delimiter='\t') out = csv.writer(f, delimiter="\t")
out.writerow(status) out.writerow(status)
if __name__ == "__main__": if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of split build smoke test") parser = argparse.ArgumentParser(
parser.add_argument("--in-results-dir", default='/test_output/') description="ClickHouse script for parsing results of split build smoke test"
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv') )
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv') parser.add_argument("--in-results-dir", default="/test_output/")
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
args = parser.parse_args() args = parser.parse_args()
state, description, test_results, logs = process_result(args.in_results_dir) state, description, test_results, logs = process_result(args.in_results_dir)

View File

@ -10,11 +10,18 @@ def process_result(result_folder):
status = "success" status = "success"
summary = [] summary = []
paths = [] paths = []
tests = ["TLPWhere", "TLPGroupBy", "TLPHaving", "TLPWhereGroupBy", "TLPDistinct", "TLPAggregate"] tests = [
"TLPWhere",
"TLPGroupBy",
"TLPHaving",
"TLPWhereGroupBy",
"TLPDistinct",
"TLPAggregate",
]
for test in tests: for test in tests:
err_path = '{}/{}.err'.format(result_folder, test) err_path = "{}/{}.err".format(result_folder, test)
out_path = '{}/{}.out'.format(result_folder, test) out_path = "{}/{}.out".format(result_folder, test)
if not os.path.exists(err_path): if not os.path.exists(err_path):
logging.info("No output err on path %s", err_path) logging.info("No output err on path %s", err_path)
summary.append((test, "SKIPPED")) summary.append((test, "SKIPPED"))
@ -23,24 +30,24 @@ def process_result(result_folder):
else: else:
paths.append(err_path) paths.append(err_path)
paths.append(out_path) paths.append(out_path)
with open(err_path, 'r') as f: with open(err_path, "r") as f:
if 'AssertionError' in f.read(): if "AssertionError" in f.read():
summary.append((test, "FAIL")) summary.append((test, "FAIL"))
status = 'failure' status = "failure"
else: else:
summary.append((test, "OK")) summary.append((test, "OK"))
logs_path = '{}/logs.tar.gz'.format(result_folder) logs_path = "{}/logs.tar.gz".format(result_folder)
if not os.path.exists(logs_path): if not os.path.exists(logs_path):
logging.info("No logs tar on path %s", logs_path) logging.info("No logs tar on path %s", logs_path)
else: else:
paths.append(logs_path) paths.append(logs_path)
stdout_path = '{}/stdout.log'.format(result_folder) stdout_path = "{}/stdout.log".format(result_folder)
if not os.path.exists(stdout_path): if not os.path.exists(stdout_path):
logging.info("No stdout log on path %s", stdout_path) logging.info("No stdout log on path %s", stdout_path)
else: else:
paths.append(stdout_path) paths.append(stdout_path)
stderr_path = '{}/stderr.log'.format(result_folder) stderr_path = "{}/stderr.log".format(result_folder)
if not os.path.exists(stderr_path): if not os.path.exists(stderr_path):
logging.info("No stderr log on path %s", stderr_path) logging.info("No stderr log on path %s", stderr_path)
else: else:
@ -52,20 +59,22 @@ def process_result(result_folder):
def write_results(results_file, status_file, results, status): def write_results(results_file, status_file, results, status):
with open(results_file, 'w') as f: with open(results_file, "w") as f:
out = csv.writer(f, delimiter='\t') out = csv.writer(f, delimiter="\t")
out.writerows(results) out.writerows(results)
with open(status_file, 'w') as f: with open(status_file, "w") as f:
out = csv.writer(f, delimiter='\t') out = csv.writer(f, delimiter="\t")
out.writerow(status) out.writerow(status)
if __name__ == "__main__": if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of sqlancer test") parser = argparse.ArgumentParser(
parser.add_argument("--in-results-dir", default='/test_output/') description="ClickHouse script for parsing results of sqlancer test"
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv') )
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv') parser.add_argument("--in-results-dir", default="/test_output/")
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
args = parser.parse_args() args = parser.parse_args()
state, description, test_results, logs = process_result(args.in_results_dir) state, description, test_results, logs = process_result(args.in_results_dir)

View File

@ -22,9 +22,9 @@ def process_result(result_folder):
total_other = 0 total_other = 0
test_results = [] test_results = []
for test in results["tests"]: for test in results["tests"]:
test_name = test['test']['test_name'] test_name = test["test"]["test_name"]
test_result = test['result']['result_type'].upper() test_result = test["result"]["result_type"].upper()
test_time = str(test['result']['message_rtime']) test_time = str(test["result"]["message_rtime"])
total_tests += 1 total_tests += 1
if test_result == "OK": if test_result == "OK":
total_ok += 1 total_ok += 1
@ -39,24 +39,29 @@ def process_result(result_folder):
else: else:
status = "success" status = "success"
description = "failed: {}, passed: {}, other: {}".format(total_fail, total_ok, total_other) description = "failed: {}, passed: {}, other: {}".format(
total_fail, total_ok, total_other
)
return status, description, test_results, [json_path, test_binary_log] return status, description, test_results, [json_path, test_binary_log]
def write_results(results_file, status_file, results, status): def write_results(results_file, status_file, results, status):
with open(results_file, 'w') as f: with open(results_file, "w") as f:
out = csv.writer(f, delimiter='\t') out = csv.writer(f, delimiter="\t")
out.writerows(results) out.writerows(results)
with open(status_file, 'w') as f: with open(status_file, "w") as f:
out = csv.writer(f, delimiter='\t') out = csv.writer(f, delimiter="\t")
out.writerow(status) out.writerow(status)
if __name__ == "__main__": if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of Testflows tests") parser = argparse.ArgumentParser(
parser.add_argument("--in-results-dir", default='./') description="ClickHouse script for parsing results of Testflows tests"
parser.add_argument("--out-results-file", default='./test_results.tsv') )
parser.add_argument("--out-status-file", default='./check_status.tsv') parser.add_argument("--in-results-dir", default="./")
parser.add_argument("--out-results-file", default="./test_results.tsv")
parser.add_argument("--out-status-file", default="./check_status.tsv")
args = parser.parse_args() args = parser.parse_args()
state, description, test_results, logs = process_result(args.in_results_dir) state, description, test_results, logs = process_result(args.in_results_dir)
@ -64,4 +69,3 @@ if __name__ == "__main__":
status = (state, description) status = (state, description)
write_results(args.out_results_file, args.out_status_file, test_results, status) write_results(args.out_results_file, args.out_status_file, test_results, status)
logging.info("Result written") logging.info("Result written")

View File

@ -5,24 +5,26 @@ import logging
import argparse import argparse
import csv import csv
OK_SIGN = 'OK ]' OK_SIGN = "OK ]"
FAILED_SIGN = 'FAILED ]' FAILED_SIGN = "FAILED ]"
SEGFAULT = 'Segmentation fault' SEGFAULT = "Segmentation fault"
SIGNAL = 'received signal SIG' SIGNAL = "received signal SIG"
PASSED = 'PASSED' PASSED = "PASSED"
def get_test_name(line): def get_test_name(line):
elements = reversed(line.split(' ')) elements = reversed(line.split(" "))
for element in elements: for element in elements:
if '(' not in element and ')' not in element: if "(" not in element and ")" not in element:
return element return element
raise Exception("No test name in line '{}'".format(line)) raise Exception("No test name in line '{}'".format(line))
def process_result(result_folder): def process_result(result_folder):
summary = [] summary = []
total_counter = 0 total_counter = 0
failed_counter = 0 failed_counter = 0
result_log_path = '{}/test_result.txt'.format(result_folder) result_log_path = "{}/test_result.txt".format(result_folder)
if not os.path.exists(result_log_path): if not os.path.exists(result_log_path):
logging.info("No output log on path %s", result_log_path) logging.info("No output log on path %s", result_log_path)
return "exception", "No output log", [] return "exception", "No output log", []
@ -30,7 +32,7 @@ def process_result(result_folder):
status = "success" status = "success"
description = "" description = ""
passed = False passed = False
with open(result_log_path, 'r') as test_result: with open(result_log_path, "r") as test_result:
for line in test_result: for line in test_result:
if OK_SIGN in line: if OK_SIGN in line:
logging.info("Found ok line: '%s'", line) logging.info("Found ok line: '%s'", line)
@ -38,7 +40,7 @@ def process_result(result_folder):
logging.info("Test name: '%s'", test_name) logging.info("Test name: '%s'", test_name)
summary.append((test_name, "OK")) summary.append((test_name, "OK"))
total_counter += 1 total_counter += 1
elif FAILED_SIGN in line and 'listed below' not in line and 'ms)' in line: elif FAILED_SIGN in line and "listed below" not in line and "ms)" in line:
logging.info("Found fail line: '%s'", line) logging.info("Found fail line: '%s'", line)
test_name = get_test_name(line.strip()) test_name = get_test_name(line.strip())
logging.info("Test name: '%s'", test_name) logging.info("Test name: '%s'", test_name)
@ -67,25 +69,30 @@ def process_result(result_folder):
status = "failure" status = "failure"
if not description: if not description:
description += "fail: {}, passed: {}".format(failed_counter, total_counter - failed_counter) description += "fail: {}, passed: {}".format(
failed_counter, total_counter - failed_counter
)
return status, description, summary return status, description, summary
def write_results(results_file, status_file, results, status): def write_results(results_file, status_file, results, status):
with open(results_file, 'w') as f: with open(results_file, "w") as f:
out = csv.writer(f, delimiter='\t') out = csv.writer(f, delimiter="\t")
out.writerows(results) out.writerows(results)
with open(status_file, 'w') as f: with open(status_file, "w") as f:
out = csv.writer(f, delimiter='\t') out = csv.writer(f, delimiter="\t")
out.writerow(status) out.writerow(status)
if __name__ == "__main__": if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of unit tests") parser = argparse.ArgumentParser(
parser.add_argument("--in-results-dir", default='/test_output/') description="ClickHouse script for parsing results of unit tests"
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv') )
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv') parser.add_argument("--in-results-dir", default="/test_output/")
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
args = parser.parse_args() args = parser.parse_args()
state, description, test_results = process_result(args.in_results_dir) state, description, test_results = process_result(args.in_results_dir)
@ -93,4 +100,3 @@ if __name__ == "__main__":
status = (state, description) status = (state, description)
write_results(args.out_results_file, args.out_status_file, test_results, status) write_results(args.out_results_file, args.out_status_file, test_results, status)
logging.info("Result written") logging.info("Result written")

View File

@ -16,6 +16,7 @@ NO_TASK_TIMEOUT_SIGNS = ["All tests have finished", "No tests were run"]
RETRIES_SIGN = "Some tests were restarted" RETRIES_SIGN = "Some tests were restarted"
def process_test_log(log_path): def process_test_log(log_path):
total = 0 total = 0
skipped = 0 skipped = 0
@ -26,7 +27,7 @@ def process_test_log(log_path):
retries = False retries = False
task_timeout = True task_timeout = True
test_results = [] test_results = []
with open(log_path, 'r') as test_file: with open(log_path, "r") as test_file:
for line in test_file: for line in test_file:
original_line = line original_line = line
line = line.strip() line = line.strip()
@ -36,12 +37,15 @@ def process_test_log(log_path):
hung = True hung = True
if RETRIES_SIGN in line: if RETRIES_SIGN in line:
retries = True retries = True
if any(sign in line for sign in (OK_SIGN, FAIL_SIGN, UNKNOWN_SIGN, SKIPPED_SIGN)): if any(
test_name = line.split(' ')[2].split(':')[0] sign in line
for sign in (OK_SIGN, FAIL_SIGN, UNKNOWN_SIGN, SKIPPED_SIGN)
):
test_name = line.split(" ")[2].split(":")[0]
test_time = '' test_time = ""
try: try:
time_token = line.split(']')[1].strip().split()[0] time_token = line.split("]")[1].strip().split()[0]
float(time_token) float(time_token)
test_time = time_token test_time = time_token
except: except:
@ -66,9 +70,22 @@ def process_test_log(log_path):
elif len(test_results) > 0 and test_results[-1][1] == "FAIL": elif len(test_results) > 0 and test_results[-1][1] == "FAIL":
test_results[-1][3].append(original_line) test_results[-1][3].append(original_line)
test_results = [(test[0], test[1], test[2], ''.join(test[3])) for test in test_results] test_results = [
(test[0], test[1], test[2], "".join(test[3])) for test in test_results
]
return (
total,
skipped,
unknown,
failed,
success,
hung,
task_timeout,
retries,
test_results,
)
return total, skipped, unknown, failed, success, hung, task_timeout, retries, test_results
def process_result(result_path): def process_result(result_path):
test_results = [] test_results = []
@ -76,16 +93,26 @@ def process_result(result_path):
description = "" description = ""
files = os.listdir(result_path) files = os.listdir(result_path)
if files: if files:
logging.info("Find files in result folder %s", ','.join(files)) logging.info("Find files in result folder %s", ",".join(files))
result_path = os.path.join(result_path, 'test_result.txt') result_path = os.path.join(result_path, "test_result.txt")
else: else:
result_path = None result_path = None
description = "No output log" description = "No output log"
state = "error" state = "error"
if result_path and os.path.exists(result_path): if result_path and os.path.exists(result_path):
total, skipped, unknown, failed, success, hung, task_timeout, retries, test_results = process_test_log(result_path) (
is_flacky_check = 1 < int(os.environ.get('NUM_TRIES', 1)) total,
skipped,
unknown,
failed,
success,
hung,
task_timeout,
retries,
test_results,
) = process_test_log(result_path)
is_flacky_check = 1 < int(os.environ.get("NUM_TRIES", 1))
logging.info("Is flacky check: %s", is_flacky_check) logging.info("Is flacky check: %s", is_flacky_check)
# If no tests were run (success == 0) it indicates an error (e.g. server did not start or crashed immediately) # If no tests were run (success == 0) it indicates an error (e.g. server did not start or crashed immediately)
# But it's Ok for "flaky checks" - they can contain just one test for check which is marked as skipped. # But it's Ok for "flaky checks" - they can contain just one test for check which is marked as skipped.
@ -120,20 +147,22 @@ def process_result(result_path):
def write_results(results_file, status_file, results, status): def write_results(results_file, status_file, results, status):
with open(results_file, 'w') as f: with open(results_file, "w") as f:
out = csv.writer(f, delimiter='\t') out = csv.writer(f, delimiter="\t")
out.writerows(results) out.writerows(results)
with open(status_file, 'w') as f: with open(status_file, "w") as f:
out = csv.writer(f, delimiter='\t') out = csv.writer(f, delimiter="\t")
out.writerow(status) out.writerow(status)
if __name__ == "__main__": if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of functional tests") parser = argparse.ArgumentParser(
parser.add_argument("--in-results-dir", default='/test_output/') description="ClickHouse script for parsing results of functional tests"
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv') )
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv') parser.add_argument("--in-results-dir", default="/test_output/")
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
args = parser.parse_args() args = parser.parse_args()
state, description, test_results = process_result(args.in_results_dir) state, description, test_results = process_result(args.in_results_dir)

View File

@ -15,24 +15,24 @@ import website
def prepare_amp_html(lang, args, root, site_temp, main_site_dir): def prepare_amp_html(lang, args, root, site_temp, main_site_dir):
src_path = root src_path = root
src_index = os.path.join(src_path, 'index.html') src_index = os.path.join(src_path, "index.html")
rel_path = os.path.relpath(src_path, site_temp) rel_path = os.path.relpath(src_path, site_temp)
dst_path = os.path.join(main_site_dir, rel_path, 'amp') dst_path = os.path.join(main_site_dir, rel_path, "amp")
dst_index = os.path.join(dst_path, 'index.html') dst_index = os.path.join(dst_path, "index.html")
logging.debug(f'Generating AMP version for {rel_path} ({lang})') logging.debug(f"Generating AMP version for {rel_path} ({lang})")
os.makedirs(dst_path) os.makedirs(dst_path)
with open(src_index, 'r') as f: with open(src_index, "r") as f:
content = f.read() content = f.read()
css_in = ' '.join(website.get_css_in(args)) css_in = " ".join(website.get_css_in(args))
command = f"purifycss --min {css_in} '{src_index}'" command = f"purifycss --min {css_in} '{src_index}'"
logging.debug(command) logging.debug(command)
inline_css = subprocess.check_output(command, shell=True).decode('utf-8') inline_css = subprocess.check_output(command, shell=True).decode("utf-8")
inline_css = inline_css.replace('!important', '').replace('/*!', '/*') inline_css = inline_css.replace("!important", "").replace("/*!", "/*")
inline_css = cssmin.cssmin(inline_css) inline_css = cssmin.cssmin(inline_css)
content = content.replace('CUSTOM_CSS_PLACEHOLDER', inline_css) content = content.replace("CUSTOM_CSS_PLACEHOLDER", inline_css)
with open(dst_index, 'w') as f: with open(dst_index, "w") as f:
f.write(content) f.write(content)
return dst_index return dst_index
@ -40,15 +40,12 @@ def prepare_amp_html(lang, args, root, site_temp, main_site_dir):
def build_amp(lang, args, cfg): def build_amp(lang, args, cfg):
# AMP docs: https://amp.dev/documentation/ # AMP docs: https://amp.dev/documentation/
logging.info(f'Building AMP version for {lang}') logging.info(f"Building AMP version for {lang}")
with util.temp_dir() as site_temp: with util.temp_dir() as site_temp:
extra = cfg.data['extra'] extra = cfg.data["extra"]
main_site_dir = cfg.data['site_dir'] main_site_dir = cfg.data["site_dir"]
extra['is_amp'] = True extra["is_amp"] = True
cfg.load_dict({ cfg.load_dict({"site_dir": site_temp, "extra": extra})
'site_dir': site_temp,
'extra': extra
})
try: try:
mkdocs.commands.build.build(cfg) mkdocs.commands.build.build(cfg)
@ -60,50 +57,49 @@ def build_amp(lang, args, cfg):
paths = [] paths = []
for root, _, filenames in os.walk(site_temp): for root, _, filenames in os.walk(site_temp):
if 'index.html' in filenames: if "index.html" in filenames:
paths.append(prepare_amp_html(lang, args, root, site_temp, main_site_dir)) paths.append(
logging.info(f'Finished building AMP version for {lang}') prepare_amp_html(lang, args, root, site_temp, main_site_dir)
)
logging.info(f"Finished building AMP version for {lang}")
def html_to_amp(content): def html_to_amp(content):
soup = bs4.BeautifulSoup( soup = bs4.BeautifulSoup(content, features="html.parser")
content,
features='html.parser'
)
for tag in soup.find_all(): for tag in soup.find_all():
if tag.attrs.get('id') == 'tostring': if tag.attrs.get("id") == "tostring":
tag.attrs['id'] = '_tostring' tag.attrs["id"] = "_tostring"
if tag.name == 'img': if tag.name == "img":
tag.name = 'amp-img' tag.name = "amp-img"
tag.attrs['layout'] = 'responsive' tag.attrs["layout"] = "responsive"
src = tag.attrs['src'] src = tag.attrs["src"]
if not (src.startswith('/') or src.startswith('http')): if not (src.startswith("/") or src.startswith("http")):
tag.attrs['src'] = f'../{src}' tag.attrs["src"] = f"../{src}"
if not tag.attrs.get('width'): if not tag.attrs.get("width"):
tag.attrs['width'] = '640' tag.attrs["width"] = "640"
if not tag.attrs.get('height'): if not tag.attrs.get("height"):
tag.attrs['height'] = '320' tag.attrs["height"] = "320"
if tag.name == 'iframe': if tag.name == "iframe":
tag.name = 'amp-iframe' tag.name = "amp-iframe"
tag.attrs['layout'] = 'responsive' tag.attrs["layout"] = "responsive"
del tag.attrs['alt'] del tag.attrs["alt"]
del tag.attrs['allowfullscreen'] del tag.attrs["allowfullscreen"]
if not tag.attrs.get('width'): if not tag.attrs.get("width"):
tag.attrs['width'] = '640' tag.attrs["width"] = "640"
if not tag.attrs.get('height'): if not tag.attrs.get("height"):
tag.attrs['height'] = '320' tag.attrs["height"] = "320"
elif tag.name == 'a': elif tag.name == "a":
href = tag.attrs.get('href') href = tag.attrs.get("href")
if href: if href:
if not (href.startswith('/') or href.startswith('http')): if not (href.startswith("/") or href.startswith("http")):
if '#' in href: if "#" in href:
href, anchor = href.split('#') href, anchor = href.split("#")
else: else:
anchor = None anchor = None
href = f'../{href}amp/' href = f"../{href}amp/"
if anchor: if anchor:
href = f'{href}#{anchor}' href = f"{href}#{anchor}"
tag.attrs['href'] = href tag.attrs["href"] = href
content = str(soup) content = str(soup)
return website.minify_html(content) return website.minify_html(content)

View File

@ -17,54 +17,52 @@ import util
def build_for_lang(lang, args): def build_for_lang(lang, args):
logging.info(f'Building {lang} blog') logging.info(f"Building {lang} blog")
try: try:
theme_cfg = { theme_cfg = {
'name': None, "name": None,
'custom_dir': os.path.join(os.path.dirname(__file__), '..', args.theme_dir), "custom_dir": os.path.join(os.path.dirname(__file__), "..", args.theme_dir),
'language': lang, "language": lang,
'direction': 'ltr', "direction": "ltr",
'static_templates': ['404.html'], "static_templates": ["404.html"],
'extra': { "extra": {
'now': int(time.mktime(datetime.datetime.now().timetuple())) # TODO better way to avoid caching "now": int(
} time.mktime(datetime.datetime.now().timetuple())
) # TODO better way to avoid caching
},
} }
# the following list of languages is sorted according to # the following list of languages is sorted according to
# https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers # https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers
languages = { languages = {"en": "English"}
'en': 'English'
}
site_names = { site_names = {"en": "ClickHouse Blog"}
'en': 'ClickHouse Blog'
}
assert len(site_names) == len(languages) assert len(site_names) == len(languages)
site_dir = os.path.join(args.blog_output_dir, lang) site_dir = os.path.join(args.blog_output_dir, lang)
plugins = ['macros'] plugins = ["macros"]
if args.htmlproofer: if args.htmlproofer:
plugins.append('htmlproofer') plugins.append("htmlproofer")
website_url = 'https://clickhouse.com' website_url = "https://clickhouse.com"
site_name = site_names.get(lang, site_names['en']) site_name = site_names.get(lang, site_names["en"])
blog_nav, post_meta = nav.build_blog_nav(lang, args) blog_nav, post_meta = nav.build_blog_nav(lang, args)
raw_config = dict( raw_config = dict(
site_name=site_name, site_name=site_name,
site_url=f'{website_url}/blog/{lang}/', site_url=f"{website_url}/blog/{lang}/",
docs_dir=os.path.join(args.blog_dir, lang), docs_dir=os.path.join(args.blog_dir, lang),
site_dir=site_dir, site_dir=site_dir,
strict=True, strict=True,
theme=theme_cfg, theme=theme_cfg,
nav=blog_nav, nav=blog_nav,
copyright='©20162022 ClickHouse, Inc.', copyright="©20162022 ClickHouse, Inc.",
use_directory_urls=True, use_directory_urls=True,
repo_name='ClickHouse/ClickHouse', repo_name="ClickHouse/ClickHouse",
repo_url='https://github.com/ClickHouse/ClickHouse/', repo_url="https://github.com/ClickHouse/ClickHouse/",
edit_uri=f'edit/master/website/blog/{lang}', edit_uri=f"edit/master/website/blog/{lang}",
markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS, markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS,
plugins=plugins, plugins=plugins,
extra=dict( extra=dict(
@ -75,12 +73,12 @@ def build_for_lang(lang, args):
website_url=website_url, website_url=website_url,
events=args.events, events=args.events,
languages=languages, languages=languages,
includes_dir=os.path.join(os.path.dirname(__file__), '..', '_includes'), includes_dir=os.path.join(os.path.dirname(__file__), "..", "_includes"),
is_amp=False, is_amp=False,
is_blog=True, is_blog=True,
post_meta=post_meta, post_meta=post_meta,
today=datetime.date.today().isoformat() today=datetime.date.today().isoformat(),
) ),
) )
cfg = config.load_config(**raw_config) cfg = config.load_config(**raw_config)
@ -89,21 +87,28 @@ def build_for_lang(lang, args):
redirects.build_blog_redirects(args) redirects.build_blog_redirects(args)
env = util.init_jinja2_env(args) env = util.init_jinja2_env(args)
with open(os.path.join(args.website_dir, 'templates', 'blog', 'rss.xml'), 'rb') as f: with open(
rss_template_string = f.read().decode('utf-8').strip() os.path.join(args.website_dir, "templates", "blog", "rss.xml"), "rb"
) as f:
rss_template_string = f.read().decode("utf-8").strip()
rss_template = env.from_string(rss_template_string) rss_template = env.from_string(rss_template_string)
with open(os.path.join(args.blog_output_dir, lang, 'rss.xml'), 'w') as f: with open(os.path.join(args.blog_output_dir, lang, "rss.xml"), "w") as f:
f.write(rss_template.render({'config': raw_config})) f.write(rss_template.render({"config": raw_config}))
logging.info(f'Finished building {lang} blog') logging.info(f"Finished building {lang} blog")
except exceptions.ConfigurationError as e: except exceptions.ConfigurationError as e:
raise SystemExit('\n' + str(e)) raise SystemExit("\n" + str(e))
def build_blog(args): def build_blog(args):
tasks = [] tasks = []
for lang in args.blog_lang.split(','): for lang in args.blog_lang.split(","):
if lang: if lang:
tasks.append((lang, args,)) tasks.append(
(
lang,
args,
)
)
util.run_function_in_parallel(build_for_lang, tasks, threads=False) util.run_function_in_parallel(build_for_lang, tasks, threads=False)

View File

@ -30,76 +30,76 @@ import website
from cmake_in_clickhouse_generator import generate_cmake_flags_files from cmake_in_clickhouse_generator import generate_cmake_flags_files
class ClickHouseMarkdown(markdown.extensions.Extension): class ClickHouseMarkdown(markdown.extensions.Extension):
class ClickHousePreprocessor(markdown.util.Processor): class ClickHousePreprocessor(markdown.util.Processor):
def run(self, lines): def run(self, lines):
for line in lines: for line in lines:
if '<!--hide-->' not in line: if "<!--hide-->" not in line:
yield line yield line
def extendMarkdown(self, md): def extendMarkdown(self, md):
md.preprocessors.register(self.ClickHousePreprocessor(), 'clickhouse_preprocessor', 31) md.preprocessors.register(
self.ClickHousePreprocessor(), "clickhouse_preprocessor", 31
)
markdown.extensions.ClickHouseMarkdown = ClickHouseMarkdown markdown.extensions.ClickHouseMarkdown = ClickHouseMarkdown
def build_for_lang(lang, args): def build_for_lang(lang, args):
logging.info(f'Building {lang} docs') logging.info(f"Building {lang} docs")
os.environ['SINGLE_PAGE'] = '0' os.environ["SINGLE_PAGE"] = "0"
try: try:
theme_cfg = { theme_cfg = {
'name': None, "name": None,
'custom_dir': os.path.join(os.path.dirname(__file__), '..', args.theme_dir), "custom_dir": os.path.join(os.path.dirname(__file__), "..", args.theme_dir),
'language': lang, "language": lang,
'direction': 'rtl' if lang == 'fa' else 'ltr', "direction": "rtl" if lang == "fa" else "ltr",
'static_templates': ['404.html'], "static_templates": ["404.html"],
'extra': { "extra": {
'now': int(time.mktime(datetime.datetime.now().timetuple())) # TODO better way to avoid caching "now": int(
} time.mktime(datetime.datetime.now().timetuple())
) # TODO better way to avoid caching
},
} }
# the following list of languages is sorted according to # the following list of languages is sorted according to
# https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers # https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers
languages = { languages = {"en": "English", "zh": "中文", "ru": "Русский", "ja": "日本語"}
'en': 'English',
'zh': '中文',
'ru': 'Русский',
'ja': '日本語'
}
site_names = { site_names = {
'en': 'ClickHouse %s Documentation', "en": "ClickHouse %s Documentation",
'zh': 'ClickHouse文档 %s', "zh": "ClickHouse文档 %s",
'ru': 'Документация ClickHouse %s', "ru": "Документация ClickHouse %s",
'ja': 'ClickHouseドキュメント %s' "ja": "ClickHouseドキュメント %s",
} }
assert len(site_names) == len(languages) assert len(site_names) == len(languages)
site_dir = os.path.join(args.docs_output_dir, lang) site_dir = os.path.join(args.docs_output_dir, lang)
plugins = ['macros'] plugins = ["macros"]
if args.htmlproofer: if args.htmlproofer:
plugins.append('htmlproofer') plugins.append("htmlproofer")
website_url = 'https://clickhouse.com' website_url = "https://clickhouse.com"
site_name = site_names.get(lang, site_names['en']) % '' site_name = site_names.get(lang, site_names["en"]) % ""
site_name = site_name.replace(' ', ' ') site_name = site_name.replace(" ", " ")
raw_config = dict( raw_config = dict(
site_name=site_name, site_name=site_name,
site_url=f'{website_url}/docs/{lang}/', site_url=f"{website_url}/docs/{lang}/",
docs_dir=os.path.join(args.docs_dir, lang), docs_dir=os.path.join(args.docs_dir, lang),
site_dir=site_dir, site_dir=site_dir,
strict=True, strict=True,
theme=theme_cfg, theme=theme_cfg,
copyright='©20162022 ClickHouse, Inc.', copyright="©20162022 ClickHouse, Inc.",
use_directory_urls=True, use_directory_urls=True,
repo_name='ClickHouse/ClickHouse', repo_name="ClickHouse/ClickHouse",
repo_url='https://github.com/ClickHouse/ClickHouse/', repo_url="https://github.com/ClickHouse/ClickHouse/",
edit_uri=f'edit/master/docs/{lang}', edit_uri=f"edit/master/docs/{lang}",
markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS, markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS,
plugins=plugins, plugins=plugins,
extra=dict( extra=dict(
@ -111,16 +111,16 @@ def build_for_lang(lang, args):
website_url=website_url, website_url=website_url,
events=args.events, events=args.events,
languages=languages, languages=languages,
includes_dir=os.path.join(os.path.dirname(__file__), '..', '_includes'), includes_dir=os.path.join(os.path.dirname(__file__), "..", "_includes"),
is_amp=False, is_amp=False,
is_blog=False is_blog=False,
) ),
) )
# Clean to be safe if last build finished abnormally # Clean to be safe if last build finished abnormally
single_page.remove_temporary_files(lang, args) single_page.remove_temporary_files(lang, args)
raw_config['nav'] = nav.build_docs_nav(lang, args) raw_config["nav"] = nav.build_docs_nav(lang, args)
cfg = config.load_config(**raw_config) cfg = config.load_config(**raw_config)
@ -131,21 +131,28 @@ def build_for_lang(lang, args):
amp.build_amp(lang, args, cfg) amp.build_amp(lang, args, cfg)
if not args.skip_single_page: if not args.skip_single_page:
single_page.build_single_page_version(lang, args, raw_config.get('nav'), cfg) single_page.build_single_page_version(
lang, args, raw_config.get("nav"), cfg
)
mdx_clickhouse.PatchedMacrosPlugin.disabled = False mdx_clickhouse.PatchedMacrosPlugin.disabled = False
logging.info(f'Finished building {lang} docs') logging.info(f"Finished building {lang} docs")
except exceptions.ConfigurationError as e: except exceptions.ConfigurationError as e:
raise SystemExit('\n' + str(e)) raise SystemExit("\n" + str(e))
def build_docs(args): def build_docs(args):
tasks = [] tasks = []
for lang in args.lang.split(','): for lang in args.lang.split(","):
if lang: if lang:
tasks.append((lang, args,)) tasks.append(
(
lang,
args,
)
)
util.run_function_in_parallel(build_for_lang, tasks, threads=False) util.run_function_in_parallel(build_for_lang, tasks, threads=False)
redirects.build_docs_redirects(args) redirects.build_docs_redirects(args)
@ -171,56 +178,64 @@ def build(args):
redirects.build_static_redirects(args) redirects.build_static_redirects(args)
if __name__ == '__main__': if __name__ == "__main__":
os.chdir(os.path.join(os.path.dirname(__file__), '..')) os.chdir(os.path.join(os.path.dirname(__file__), ".."))
# A root path to ClickHouse source code. # A root path to ClickHouse source code.
src_dir = '..' src_dir = ".."
website_dir = os.path.join(src_dir, 'website') website_dir = os.path.join(src_dir, "website")
arg_parser = argparse.ArgumentParser() arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('--lang', default='en,ru,zh,ja') arg_parser.add_argument("--lang", default="en,ru,zh,ja")
arg_parser.add_argument('--blog-lang', default='en') arg_parser.add_argument("--blog-lang", default="en")
arg_parser.add_argument('--docs-dir', default='.') arg_parser.add_argument("--docs-dir", default=".")
arg_parser.add_argument('--theme-dir', default=website_dir) arg_parser.add_argument("--theme-dir", default=website_dir)
arg_parser.add_argument('--website-dir', default=website_dir) arg_parser.add_argument("--website-dir", default=website_dir)
arg_parser.add_argument('--src-dir', default=src_dir) arg_parser.add_argument("--src-dir", default=src_dir)
arg_parser.add_argument('--blog-dir', default=os.path.join(website_dir, 'blog')) arg_parser.add_argument("--blog-dir", default=os.path.join(website_dir, "blog"))
arg_parser.add_argument('--output-dir', default='build') arg_parser.add_argument("--output-dir", default="build")
arg_parser.add_argument('--nav-limit', type=int, default='0') arg_parser.add_argument("--nav-limit", type=int, default="0")
arg_parser.add_argument('--skip-multi-page', action='store_true') arg_parser.add_argument("--skip-multi-page", action="store_true")
arg_parser.add_argument('--skip-single-page', action='store_true') arg_parser.add_argument("--skip-single-page", action="store_true")
arg_parser.add_argument('--skip-amp', action='store_true') arg_parser.add_argument("--skip-amp", action="store_true")
arg_parser.add_argument('--skip-website', action='store_true') arg_parser.add_argument("--skip-website", action="store_true")
arg_parser.add_argument('--skip-blog', action='store_true') arg_parser.add_argument("--skip-blog", action="store_true")
arg_parser.add_argument('--skip-git-log', action='store_true') arg_parser.add_argument("--skip-git-log", action="store_true")
arg_parser.add_argument('--skip-docs', action='store_true') arg_parser.add_argument("--skip-docs", action="store_true")
arg_parser.add_argument('--test-only', action='store_true') arg_parser.add_argument("--test-only", action="store_true")
arg_parser.add_argument('--minify', action='store_true') arg_parser.add_argument("--minify", action="store_true")
arg_parser.add_argument('--htmlproofer', action='store_true') arg_parser.add_argument("--htmlproofer", action="store_true")
arg_parser.add_argument('--no-docs-macros', action='store_true') arg_parser.add_argument("--no-docs-macros", action="store_true")
arg_parser.add_argument('--save-raw-single-page', type=str) arg_parser.add_argument("--save-raw-single-page", type=str)
arg_parser.add_argument('--livereload', type=int, default='0') arg_parser.add_argument("--livereload", type=int, default="0")
arg_parser.add_argument('--verbose', action='store_true') arg_parser.add_argument("--verbose", action="store_true")
args = arg_parser.parse_args() args = arg_parser.parse_args()
args.minify = False # TODO remove args.minify = False # TODO remove
logging.basicConfig( logging.basicConfig(
level=logging.DEBUG if args.verbose else logging.INFO, level=logging.DEBUG if args.verbose else logging.INFO, stream=sys.stderr
stream=sys.stderr
) )
logging.getLogger('MARKDOWN').setLevel(logging.INFO) logging.getLogger("MARKDOWN").setLevel(logging.INFO)
args.docs_output_dir = os.path.join(os.path.abspath(args.output_dir), 'docs') args.docs_output_dir = os.path.join(os.path.abspath(args.output_dir), "docs")
args.blog_output_dir = os.path.join(os.path.abspath(args.output_dir), 'blog') args.blog_output_dir = os.path.join(os.path.abspath(args.output_dir), "blog")
from github import get_events from github import get_events
args.rev = subprocess.check_output('git rev-parse HEAD', shell=True).decode('utf-8').strip()
args.rev_short = subprocess.check_output('git rev-parse --short HEAD', shell=True).decode('utf-8').strip() args.rev = (
args.rev_url = f'https://github.com/ClickHouse/ClickHouse/commit/{args.rev}' subprocess.check_output("git rev-parse HEAD", shell=True)
.decode("utf-8")
.strip()
)
args.rev_short = (
subprocess.check_output("git rev-parse --short HEAD", shell=True)
.decode("utf-8")
.strip()
)
args.rev_url = f"https://github.com/ClickHouse/ClickHouse/commit/{args.rev}"
args.events = get_events(args) args.events = get_events(args)
if args.test_only: if args.test_only:
@ -233,18 +248,20 @@ if __name__ == '__main__':
mdx_clickhouse.PatchedMacrosPlugin.skip_git_log = True mdx_clickhouse.PatchedMacrosPlugin.skip_git_log = True
from build import build from build import build
build(args) build(args)
if args.livereload: if args.livereload:
new_args = [arg for arg in sys.argv if not arg.startswith('--livereload')] new_args = [arg for arg in sys.argv if not arg.startswith("--livereload")]
new_args = sys.executable + ' ' + ' '.join(new_args) new_args = sys.executable + " " + " ".join(new_args)
server = livereload.Server() server = livereload.Server()
server.watch(args.docs_dir + '**/*', livereload.shell(new_args, cwd='tools', shell=True)) server.watch(
server.watch(args.website_dir + '**/*', livereload.shell(new_args, cwd='tools', shell=True)) args.docs_dir + "**/*", livereload.shell(new_args, cwd="tools", shell=True)
server.serve(
root=args.output_dir,
host='0.0.0.0',
port=args.livereload
) )
server.watch(
args.website_dir + "**/*",
livereload.shell(new_args, cwd="tools", shell=True),
)
server.serve(root=args.output_dir, host="0.0.0.0", port=args.livereload)
sys.exit(0) sys.exit(0)

View File

@ -6,11 +6,13 @@ from typing import TextIO, List, Tuple, Optional, Dict
Entity = Tuple[str, str, str] Entity = Tuple[str, str, str]
# https://regex101.com/r/R6iogw/12 # https://regex101.com/r/R6iogw/12
cmake_option_regex: str = r"^\s*option\s*\(([A-Z_0-9${}]+)\s*(?:\"((?:.|\n)*?)\")?\s*(.*)?\).*$" cmake_option_regex: str = (
r"^\s*option\s*\(([A-Z_0-9${}]+)\s*(?:\"((?:.|\n)*?)\")?\s*(.*)?\).*$"
)
ch_master_url: str = "https://github.com/clickhouse/clickhouse/blob/master/" ch_master_url: str = "https://github.com/clickhouse/clickhouse/blob/master/"
name_str: str = "<a name=\"{anchor}\"></a>[`{name}`](" + ch_master_url + "{path}#L{line})" name_str: str = '<a name="{anchor}"></a>[`{name}`](' + ch_master_url + "{path}#L{line})"
default_anchor_str: str = "[`{name}`](#{anchor})" default_anchor_str: str = "[`{name}`](#{anchor})"
comment_var_regex: str = r"\${(.+)}" comment_var_regex: str = r"\${(.+)}"
@ -27,11 +29,15 @@ entities: Dict[str, Tuple[str, str]] = {}
def make_anchor(t: str) -> str: def make_anchor(t: str) -> str:
return "".join(["-" if i == "_" else i.lower() for i in t if i.isalpha() or i == "_"]) return "".join(
["-" if i == "_" else i.lower() for i in t if i.isalpha() or i == "_"]
)
def process_comment(comment: str) -> str: def process_comment(comment: str) -> str:
return re.sub(comment_var_regex, comment_var_replace, comment, flags=re.MULTILINE) return re.sub(comment_var_regex, comment_var_replace, comment, flags=re.MULTILINE)
def build_entity(path: str, entity: Entity, line_comment: Tuple[int, str]) -> None: def build_entity(path: str, entity: Entity, line_comment: Tuple[int, str]) -> None:
(line, comment) = line_comment (line, comment) = line_comment
(name, description, default) = entity (name, description, default) = entity
@ -47,22 +53,22 @@ def build_entity(path: str, entity: Entity, line_comment: Tuple[int, str]) -> No
formatted_default: str = "`" + default + "`" formatted_default: str = "`" + default + "`"
formatted_name: str = name_str.format( formatted_name: str = name_str.format(
anchor=make_anchor(name), anchor=make_anchor(name), name=name, path=path, line=line
name=name, )
path=path,
line=line)
formatted_description: str = "".join(description.split("\n")) formatted_description: str = "".join(description.split("\n"))
formatted_comment: str = process_comment(comment) formatted_comment: str = process_comment(comment)
formatted_entity: str = "| {} | {} | {} | {} |".format( formatted_entity: str = "| {} | {} | {} | {} |".format(
formatted_name, formatted_default, formatted_description, formatted_comment) formatted_name, formatted_default, formatted_description, formatted_comment
)
entities[name] = path, formatted_entity entities[name] = path, formatted_entity
def process_file(root_path: str, file_path: str, file_name: str) -> None: def process_file(root_path: str, file_path: str, file_name: str) -> None:
with open(os.path.join(file_path, file_name), 'r') as cmake_file: with open(os.path.join(file_path, file_name), "r") as cmake_file:
contents: str = cmake_file.read() contents: str = cmake_file.read()
def get_line_and_comment(target: str) -> Tuple[int, str]: def get_line_and_comment(target: str) -> Tuple[int, str]:
@ -70,10 +76,10 @@ def process_file(root_path: str, file_path: str, file_name: str) -> None:
comment: str = "" comment: str = ""
for n, line in enumerate(contents_list): for n, line in enumerate(contents_list):
if 'option' not in line.lower() or target not in line: if "option" not in line.lower() or target not in line:
continue continue
for maybe_comment_line in contents_list[n - 1::-1]: for maybe_comment_line in contents_list[n - 1 :: -1]:
if not re.match("\s*#\s*", maybe_comment_line): if not re.match("\s*#\s*", maybe_comment_line):
break break
@ -82,16 +88,22 @@ def process_file(root_path: str, file_path: str, file_name: str) -> None:
# line numbering starts with 1 # line numbering starts with 1
return n + 1, comment return n + 1, comment
matches: Optional[List[Entity]] = re.findall(cmake_option_regex, contents, re.MULTILINE) matches: Optional[List[Entity]] = re.findall(
cmake_option_regex, contents, re.MULTILINE
)
file_rel_path_with_name: str = os.path.join(
file_rel_path_with_name: str = os.path.join(file_path[len(root_path):], file_name) file_path[len(root_path) :], file_name
if file_rel_path_with_name.startswith('/'): )
if file_rel_path_with_name.startswith("/"):
file_rel_path_with_name = file_rel_path_with_name[1:] file_rel_path_with_name = file_rel_path_with_name[1:]
if matches: if matches:
for entity in matches: for entity in matches:
build_entity(file_rel_path_with_name, entity, get_line_and_comment(entity[0])) build_entity(
file_rel_path_with_name, entity, get_line_and_comment(entity[0])
)
def process_folder(root_path: str, name: str) -> None: def process_folder(root_path: str, name: str) -> None:
for root, _, files in os.walk(os.path.join(root_path, name)): for root, _, files in os.walk(os.path.join(root_path, name)):
@ -99,12 +111,19 @@ def process_folder(root_path: str, name: str) -> None:
if f == "CMakeLists.txt" or ".cmake" in f: if f == "CMakeLists.txt" or ".cmake" in f:
process_file(root_path, root, f) process_file(root_path, root, f)
def generate_cmake_flags_files() -> None:
root_path: str = os.path.join(os.path.dirname(__file__), '..', '..')
output_file_name: str = os.path.join(root_path, "docs/en/development/cmake-in-clickhouse.md") def generate_cmake_flags_files() -> None:
header_file_name: str = os.path.join(root_path, "docs/_includes/cmake_in_clickhouse_header.md") root_path: str = os.path.join(os.path.dirname(__file__), "..", "..")
footer_file_name: str = os.path.join(root_path, "docs/_includes/cmake_in_clickhouse_footer.md")
output_file_name: str = os.path.join(
root_path, "docs/en/development/cmake-in-clickhouse.md"
)
header_file_name: str = os.path.join(
root_path, "docs/_includes/cmake_in_clickhouse_header.md"
)
footer_file_name: str = os.path.join(
root_path, "docs/_includes/cmake_in_clickhouse_footer.md"
)
process_file(root_path, root_path, "CMakeLists.txt") process_file(root_path, root_path, "CMakeLists.txt")
process_file(root_path, os.path.join(root_path, "programs"), "CMakeLists.txt") process_file(root_path, os.path.join(root_path, "programs"), "CMakeLists.txt")
@ -127,8 +146,10 @@ def generate_cmake_flags_files() -> None:
f.write(entities[k][1] + "\n") f.write(entities[k][1] + "\n")
ignored_keys.append(k) ignored_keys.append(k)
f.write("\n### External libraries\nNote that ClickHouse uses forks of these libraries, see https://github.com/ClickHouse-Extras.\n" + f.write(
table_header) "\n### External libraries\nNote that ClickHouse uses forks of these libraries, see https://github.com/ClickHouse-Extras.\n"
+ table_header
)
for k in sorted_keys: for k in sorted_keys:
if k.startswith("ENABLE_") and ".cmake" in entities[k][0]: if k.startswith("ENABLE_") and ".cmake" in entities[k][0]:
@ -143,15 +164,18 @@ def generate_cmake_flags_files() -> None:
with open(footer_file_name, "r") as footer: with open(footer_file_name, "r") as footer:
f.write(footer.read()) f.write(footer.read())
other_languages = ["docs/ja/development/cmake-in-clickhouse.md", other_languages = [
"docs/zh/development/cmake-in-clickhouse.md", "docs/ja/development/cmake-in-clickhouse.md",
"docs/ru/development/cmake-in-clickhouse.md"] "docs/zh/development/cmake-in-clickhouse.md",
"docs/ru/development/cmake-in-clickhouse.md",
]
for lang in other_languages: for lang in other_languages:
other_file_name = os.path.join(root_path, lang) other_file_name = os.path.join(root_path, lang)
if os.path.exists(other_file_name): if os.path.exists(other_file_name):
os.unlink(other_file_name) os.unlink(other_file_name)
os.symlink(output_file_name, other_file_name) os.symlink(output_file_name, other_file_name)
if __name__ == '__main__':
if __name__ == "__main__":
generate_cmake_flags_files() generate_cmake_flags_files()

View File

@ -8,7 +8,7 @@ import contextlib
from git import cmd from git import cmd
from tempfile import NamedTemporaryFile from tempfile import NamedTemporaryFile
SCRIPT_DESCRIPTION = ''' SCRIPT_DESCRIPTION = """
usage: ./easy_diff.py language/document path usage: ./easy_diff.py language/document path
Show the difference between a language document and an English document. Show the difference between a language document and an English document.
@ -53,16 +53,16 @@ SCRIPT_DESCRIPTION = '''
OPTIONS: OPTIONS:
-h, --help show this help message and exit -h, --help show this help message and exit
--no-pager use stdout as difference result output --no-pager use stdout as difference result output
''' """
SCRIPT_PATH = os.path.abspath(__file__) SCRIPT_PATH = os.path.abspath(__file__)
CLICKHOUSE_REPO_HOME = os.path.join(os.path.dirname(SCRIPT_PATH), '..', '..') CLICKHOUSE_REPO_HOME = os.path.join(os.path.dirname(SCRIPT_PATH), "..", "..")
SCRIPT_COMMAND_EXECUTOR = cmd.Git(CLICKHOUSE_REPO_HOME) SCRIPT_COMMAND_EXECUTOR = cmd.Git(CLICKHOUSE_REPO_HOME)
SCRIPT_COMMAND_PARSER = argparse.ArgumentParser(add_help=False) SCRIPT_COMMAND_PARSER = argparse.ArgumentParser(add_help=False)
SCRIPT_COMMAND_PARSER.add_argument('path', type=bytes, nargs='?', default=None) SCRIPT_COMMAND_PARSER.add_argument("path", type=bytes, nargs="?", default=None)
SCRIPT_COMMAND_PARSER.add_argument('--no-pager', action='store_true', default=False) SCRIPT_COMMAND_PARSER.add_argument("--no-pager", action="store_true", default=False)
SCRIPT_COMMAND_PARSER.add_argument('-h', '--help', action='store_true', default=False) SCRIPT_COMMAND_PARSER.add_argument("-h", "--help", action="store_true", default=False)
def execute(commands): def execute(commands):
@ -70,19 +70,41 @@ def execute(commands):
def get_hash(file_name): def get_hash(file_name):
return execute(['git', 'log', '-n', '1', '--pretty=format:"%H"', file_name]) return execute(["git", "log", "-n", "1", '--pretty=format:"%H"', file_name])
def diff_file(reference_file, working_file, out): def diff_file(reference_file, working_file, out):
if not os.path.exists(reference_file): if not os.path.exists(reference_file):
raise RuntimeError('reference file [' + os.path.abspath(reference_file) + '] is not exists.') raise RuntimeError(
"reference file [" + os.path.abspath(reference_file) + "] is not exists."
)
if os.path.islink(working_file): if os.path.islink(working_file):
out.writelines(["Need translate document:" + os.path.abspath(reference_file)]) out.writelines(["Need translate document:" + os.path.abspath(reference_file)])
elif not os.path.exists(working_file): elif not os.path.exists(working_file):
out.writelines(['Need link document ' + os.path.abspath(reference_file) + ' to ' + os.path.abspath(working_file)]) out.writelines(
[
"Need link document "
+ os.path.abspath(reference_file)
+ " to "
+ os.path.abspath(working_file)
]
)
elif get_hash(working_file) != get_hash(reference_file): elif get_hash(working_file) != get_hash(reference_file):
out.writelines([(execute(['git', 'diff', get_hash(working_file).strip('"'), reference_file]).encode('utf-8'))]) out.writelines(
[
(
execute(
[
"git",
"diff",
get_hash(working_file).strip('"'),
reference_file,
]
).encode("utf-8")
)
]
)
return 0 return 0
@ -94,20 +116,30 @@ def diff_directory(reference_directory, working_directory, out):
for list_item in os.listdir(reference_directory): for list_item in os.listdir(reference_directory):
working_item = os.path.join(working_directory, list_item) working_item = os.path.join(working_directory, list_item)
reference_item = os.path.join(reference_directory, list_item) reference_item = os.path.join(reference_directory, list_item)
if diff_file(reference_item, working_item, out) if os.path.isfile(reference_item) else diff_directory(reference_item, working_item, out) != 0: if (
diff_file(reference_item, working_item, out)
if os.path.isfile(reference_item)
else diff_directory(reference_item, working_item, out) != 0
):
return 1 return 1
return 0 return 0
def find_language_doc(custom_document, other_language='en', children=[]): def find_language_doc(custom_document, other_language="en", children=[]):
if len(custom_document) == 0: if len(custom_document) == 0:
raise RuntimeError('The ' + os.path.join(custom_document, *children) + " is not in docs directory.") raise RuntimeError(
"The "
+ os.path.join(custom_document, *children)
+ " is not in docs directory."
)
if os.path.samefile(os.path.join(CLICKHOUSE_REPO_HOME, 'docs'), custom_document): if os.path.samefile(os.path.join(CLICKHOUSE_REPO_HOME, "docs"), custom_document):
return os.path.join(CLICKHOUSE_REPO_HOME, 'docs', other_language, *children[1:]) return os.path.join(CLICKHOUSE_REPO_HOME, "docs", other_language, *children[1:])
children.insert(0, os.path.split(custom_document)[1]) children.insert(0, os.path.split(custom_document)[1])
return find_language_doc(os.path.split(custom_document)[0], other_language, children) return find_language_doc(
os.path.split(custom_document)[0], other_language, children
)
class ToPager: class ToPager:
@ -119,7 +151,7 @@ class ToPager:
def close(self): def close(self):
self.temp_named_file.flush() self.temp_named_file.flush()
git_pager = execute(['git', 'var', 'GIT_PAGER']) git_pager = execute(["git", "var", "GIT_PAGER"])
subprocess.check_call([git_pager, self.temp_named_file.name]) subprocess.check_call([git_pager, self.temp_named_file.name])
self.temp_named_file.close() self.temp_named_file.close()
@ -135,12 +167,20 @@ class ToStdOut:
self.system_stdout_stream = system_stdout_stream self.system_stdout_stream = system_stdout_stream
if __name__ == '__main__': if __name__ == "__main__":
arguments = SCRIPT_COMMAND_PARSER.parse_args() arguments = SCRIPT_COMMAND_PARSER.parse_args()
if arguments.help or not arguments.path: if arguments.help or not arguments.path:
sys.stdout.write(SCRIPT_DESCRIPTION) sys.stdout.write(SCRIPT_DESCRIPTION)
sys.exit(0) sys.exit(0)
working_language = os.path.join(CLICKHOUSE_REPO_HOME, 'docs', arguments.path) working_language = os.path.join(CLICKHOUSE_REPO_HOME, "docs", arguments.path)
with contextlib.closing(ToStdOut(sys.stdout) if arguments.no_pager else ToPager(NamedTemporaryFile('r+'))) as writer: with contextlib.closing(
exit(diff_directory(find_language_doc(working_language), working_language, writer)) ToStdOut(sys.stdout)
if arguments.no_pager
else ToPager(NamedTemporaryFile("r+"))
) as writer:
exit(
diff_directory(
find_language_doc(working_language), working_language, writer
)
)

View File

@ -16,27 +16,26 @@ import util
def get_events(args): def get_events(args):
events = [] events = []
skip = True skip = True
with open(os.path.join(args.docs_dir, '..', 'README.md')) as f: with open(os.path.join(args.docs_dir, "..", "README.md")) as f:
for line in f: for line in f:
if skip: if skip:
if 'Upcoming Events' in line: if "Upcoming Events" in line:
skip = False skip = False
else: else:
if not line: if not line:
continue continue
line = line.strip().split('](') line = line.strip().split("](")
if len(line) == 2: if len(line) == 2:
tail = line[1].split(') ') tail = line[1].split(") ")
events.append({ events.append(
'signup_link': tail[0], {
'event_name': line[0].replace('* [', ''), "signup_link": tail[0],
'event_date': tail[1].replace('on ', '').replace('.', '') "event_name": line[0].replace("* [", ""),
}) "event_date": tail[1].replace("on ", "").replace(".", ""),
}
)
return events return events
if __name__ == '__main__': if __name__ == "__main__":
logging.basicConfig( logging.basicConfig(level=logging.DEBUG, stream=sys.stderr)
level=logging.DEBUG,
stream=sys.stderr
)

View File

@ -16,74 +16,79 @@ import slugify as slugify_impl
def slugify(value, separator): def slugify(value, separator):
return slugify_impl.slugify(value, separator=separator, word_boundary=True, save_order=True) return slugify_impl.slugify(
value, separator=separator, word_boundary=True, save_order=True
)
MARKDOWN_EXTENSIONS = [ MARKDOWN_EXTENSIONS = [
'mdx_clickhouse', "mdx_clickhouse",
'admonition', "admonition",
'attr_list', "attr_list",
'def_list', "def_list",
'codehilite', "codehilite",
'nl2br', "nl2br",
'sane_lists', "sane_lists",
'pymdownx.details', "pymdownx.details",
'pymdownx.magiclink', "pymdownx.magiclink",
'pymdownx.superfences', "pymdownx.superfences",
'extra', "extra",
{ {"toc": {"permalink": True, "slugify": slugify}},
'toc': {
'permalink': True,
'slugify': slugify
}
}
] ]
class ClickHouseLinkMixin(object): class ClickHouseLinkMixin(object):
def handleMatch(self, m, data): def handleMatch(self, m, data):
single_page = (os.environ.get('SINGLE_PAGE') == '1') single_page = os.environ.get("SINGLE_PAGE") == "1"
try: try:
el, start, end = super(ClickHouseLinkMixin, self).handleMatch(m, data) el, start, end = super(ClickHouseLinkMixin, self).handleMatch(m, data)
except IndexError: except IndexError:
return return
if el is not None: if el is not None:
href = el.get('href') or '' href = el.get("href") or ""
is_external = href.startswith('http:') or href.startswith('https:') is_external = href.startswith("http:") or href.startswith("https:")
if is_external: if is_external:
if not href.startswith('https://clickhouse.com'): if not href.startswith("https://clickhouse.com"):
el.set('rel', 'external nofollow noreferrer') el.set("rel", "external nofollow noreferrer")
elif single_page: elif single_page:
if '#' in href: if "#" in href:
el.set('href', '#' + href.split('#', 1)[1]) el.set("href", "#" + href.split("#", 1)[1])
else: else:
el.set('href', '#' + href.replace('/index.md', '/').replace('.md', '/')) el.set(
"href", "#" + href.replace("/index.md", "/").replace(".md", "/")
)
return el, start, end return el, start, end
class ClickHouseAutolinkPattern(ClickHouseLinkMixin, markdown.inlinepatterns.AutolinkInlineProcessor): class ClickHouseAutolinkPattern(
ClickHouseLinkMixin, markdown.inlinepatterns.AutolinkInlineProcessor
):
pass pass
class ClickHouseLinkPattern(ClickHouseLinkMixin, markdown.inlinepatterns.LinkInlineProcessor): class ClickHouseLinkPattern(
ClickHouseLinkMixin, markdown.inlinepatterns.LinkInlineProcessor
):
pass pass
class ClickHousePreprocessor(markdown.util.Processor): class ClickHousePreprocessor(markdown.util.Processor):
def run(self, lines): def run(self, lines):
for line in lines: for line in lines:
if '<!--hide-->' not in line: if "<!--hide-->" not in line:
yield line yield line
class ClickHouseMarkdown(markdown.extensions.Extension): class ClickHouseMarkdown(markdown.extensions.Extension):
def extendMarkdown(self, md, md_globals): def extendMarkdown(self, md, md_globals):
md.preprocessors['clickhouse'] = ClickHousePreprocessor() md.preprocessors["clickhouse"] = ClickHousePreprocessor()
md.inlinePatterns['link'] = ClickHouseLinkPattern(markdown.inlinepatterns.LINK_RE, md) md.inlinePatterns["link"] = ClickHouseLinkPattern(
md.inlinePatterns['autolink'] = ClickHouseAutolinkPattern(markdown.inlinepatterns.AUTOLINK_RE, md) markdown.inlinepatterns.LINK_RE, md
)
md.inlinePatterns["autolink"] = ClickHouseAutolinkPattern(
markdown.inlinepatterns.AUTOLINK_RE, md
)
def makeExtension(**kwargs): def makeExtension(**kwargs):
@ -92,10 +97,8 @@ def makeExtension(**kwargs):
def get_translations(dirname, lang): def get_translations(dirname, lang):
import babel.support import babel.support
return babel.support.Translations.load(
dirname=dirname, return babel.support.Translations.load(dirname=dirname, locales=[lang, "en"])
locales=[lang, 'en']
)
class PatchedMacrosPlugin(macros.plugin.MacrosPlugin): class PatchedMacrosPlugin(macros.plugin.MacrosPlugin):
@ -104,22 +107,22 @@ class PatchedMacrosPlugin(macros.plugin.MacrosPlugin):
def on_config(self, config): def on_config(self, config):
super(PatchedMacrosPlugin, self).on_config(config) super(PatchedMacrosPlugin, self).on_config(config)
self.env.comment_start_string = '{##' self.env.comment_start_string = "{##"
self.env.comment_end_string = '##}' self.env.comment_end_string = "##}"
self.env.loader = jinja2.FileSystemLoader([ self.env.loader = jinja2.FileSystemLoader(
os.path.join(config.data['site_dir']), [
os.path.join(config.data['extra']['includes_dir']) os.path.join(config.data["site_dir"]),
]) os.path.join(config.data["extra"]["includes_dir"]),
]
)
def on_env(self, env, config, files): def on_env(self, env, config, files):
import util import util
env.add_extension('jinja2.ext.i18n')
dirname = os.path.join(config.data['theme'].dirs[0], 'locale') env.add_extension("jinja2.ext.i18n")
lang = config.data['theme']['language'] dirname = os.path.join(config.data["theme"].dirs[0], "locale")
env.install_gettext_translations( lang = config.data["theme"]["language"]
get_translations(dirname, lang), env.install_gettext_translations(get_translations(dirname, lang), newstyle=True)
newstyle=True
)
util.init_jinja2_filters(env) util.init_jinja2_filters(env)
return env return env
@ -130,13 +133,17 @@ class PatchedMacrosPlugin(macros.plugin.MacrosPlugin):
return markdown return markdown
def on_page_markdown(self, markdown, page, config, files): def on_page_markdown(self, markdown, page, config, files):
markdown = super(PatchedMacrosPlugin, self).on_page_markdown(markdown, page, config, files) markdown = super(PatchedMacrosPlugin, self).on_page_markdown(
markdown, page, config, files
)
if os.path.islink(page.file.abs_src_path): if os.path.islink(page.file.abs_src_path):
lang = config.data['theme']['language'] lang = config.data["theme"]["language"]
page.canonical_url = page.canonical_url.replace(f'/{lang}/', '/en/', 1) page.canonical_url = page.canonical_url.replace(f"/{lang}/", "/en/", 1)
if config.data['extra'].get('version_prefix') or config.data['extra'].get('single_page'): if config.data["extra"].get("version_prefix") or config.data["extra"].get(
"single_page"
):
return markdown return markdown
if self.skip_git_log: if self.skip_git_log:
return markdown return markdown

View File

@ -10,57 +10,59 @@ import util
def find_first_header(content): def find_first_header(content):
for line in content.split('\n'): for line in content.split("\n"):
if line.startswith('#'): if line.startswith("#"):
no_hash = line.lstrip('#') no_hash = line.lstrip("#")
return no_hash.split('{', 1)[0].strip() return no_hash.split("{", 1)[0].strip()
def build_nav_entry(root, args): def build_nav_entry(root, args):
if root.endswith('images'): if root.endswith("images"):
return None, None, None return None, None, None
result_items = [] result_items = []
index_meta, index_content = util.read_md_file(os.path.join(root, 'index.md')) index_meta, index_content = util.read_md_file(os.path.join(root, "index.md"))
current_title = index_meta.get('toc_folder_title', index_meta.get('toc_title')) current_title = index_meta.get("toc_folder_title", index_meta.get("toc_title"))
current_title = current_title or index_meta.get('title', find_first_header(index_content)) current_title = current_title or index_meta.get(
"title", find_first_header(index_content)
)
for filename in os.listdir(root): for filename in os.listdir(root):
path = os.path.join(root, filename) path = os.path.join(root, filename)
if os.path.isdir(path): if os.path.isdir(path):
prio, title, payload = build_nav_entry(path, args) prio, title, payload = build_nav_entry(path, args)
if title and payload: if title and payload:
result_items.append((prio, title, payload)) result_items.append((prio, title, payload))
elif filename.endswith('.md'): elif filename.endswith(".md"):
path = os.path.join(root, filename) path = os.path.join(root, filename)
meta = '' meta = ""
content = '' content = ""
try: try:
meta, content = util.read_md_file(path) meta, content = util.read_md_file(path)
except: except:
print('Error in file: {}'.format(path)) print("Error in file: {}".format(path))
raise raise
path = path.split('/', 2)[-1] path = path.split("/", 2)[-1]
title = meta.get('toc_title', find_first_header(content)) title = meta.get("toc_title", find_first_header(content))
if title: if title:
title = title.strip().rstrip('.') title = title.strip().rstrip(".")
else: else:
title = meta.get('toc_folder_title', 'hidden') title = meta.get("toc_folder_title", "hidden")
prio = meta.get('toc_priority', 9999) prio = meta.get("toc_priority", 9999)
logging.debug(f'Nav entry: {prio}, {title}, {path}') logging.debug(f"Nav entry: {prio}, {title}, {path}")
if meta.get('toc_hidden') or not content.strip(): if meta.get("toc_hidden") or not content.strip():
title = 'hidden' title = "hidden"
if title == 'hidden': if title == "hidden":
title = 'hidden-' + hashlib.sha1(content.encode('utf-8')).hexdigest() title = "hidden-" + hashlib.sha1(content.encode("utf-8")).hexdigest()
if args.nav_limit and len(result_items) >= args.nav_limit: if args.nav_limit and len(result_items) >= args.nav_limit:
break break
result_items.append((prio, title, path)) result_items.append((prio, title, path))
result_items = sorted(result_items, key=lambda x: (x[0], x[1])) result_items = sorted(result_items, key=lambda x: (x[0], x[1]))
result = collections.OrderedDict([(item[1], item[2]) for item in result_items]) result = collections.OrderedDict([(item[1], item[2]) for item in result_items])
if index_meta.get('toc_hidden_folder'): if index_meta.get("toc_hidden_folder"):
current_title += '|hidden-folder' current_title += "|hidden-folder"
return index_meta.get('toc_priority', 10000), current_title, result return index_meta.get("toc_priority", 10000), current_title, result
def build_docs_nav(lang, args): def build_docs_nav(lang, args):
@ -70,7 +72,7 @@ def build_docs_nav(lang, args):
index_key = None index_key = None
for key, value in list(nav.items()): for key, value in list(nav.items()):
if key and value: if key and value:
if value == 'index.md': if value == "index.md":
index_key = key index_key = key
continue continue
result.append({key: value}) result.append({key: value})
@ -78,7 +80,7 @@ def build_docs_nav(lang, args):
break break
if index_key: if index_key:
key = list(result[0].keys())[0] key = list(result[0].keys())[0]
result[0][key][index_key] = 'index.md' result[0][key][index_key] = "index.md"
result[0][key].move_to_end(index_key, last=False) result[0][key].move_to_end(index_key, last=False)
return result return result
@ -86,7 +88,7 @@ def build_docs_nav(lang, args):
def build_blog_nav(lang, args): def build_blog_nav(lang, args):
blog_dir = os.path.join(args.blog_dir, lang) blog_dir = os.path.join(args.blog_dir, lang)
years = sorted(os.listdir(blog_dir), reverse=True) years = sorted(os.listdir(blog_dir), reverse=True)
result_nav = [{'hidden': 'index.md'}] result_nav = [{"hidden": "index.md"}]
post_meta = collections.OrderedDict() post_meta = collections.OrderedDict()
for year in years: for year in years:
year_dir = os.path.join(blog_dir, year) year_dir = os.path.join(blog_dir, year)
@ -97,38 +99,53 @@ def build_blog_nav(lang, args):
post_meta_items = [] post_meta_items = []
for post in os.listdir(year_dir): for post in os.listdir(year_dir):
post_path = os.path.join(year_dir, post) post_path = os.path.join(year_dir, post)
if not post.endswith('.md'): if not post.endswith(".md"):
raise RuntimeError(f'Unexpected non-md file in posts folder: {post_path}') raise RuntimeError(
f"Unexpected non-md file in posts folder: {post_path}"
)
meta, _ = util.read_md_file(post_path) meta, _ = util.read_md_file(post_path)
post_date = meta['date'] post_date = meta["date"]
post_title = meta['title'] post_title = meta["title"]
if datetime.date.fromisoformat(post_date) > datetime.date.today(): if datetime.date.fromisoformat(post_date) > datetime.date.today():
continue continue
posts.append( posts.append(
(post_date, post_title, os.path.join(year, post),) (
post_date,
post_title,
os.path.join(year, post),
)
) )
if post_title in post_meta: if post_title in post_meta:
raise RuntimeError(f'Duplicate post title: {post_title}') raise RuntimeError(f"Duplicate post title: {post_title}")
if not post_date.startswith(f'{year}-'): if not post_date.startswith(f"{year}-"):
raise RuntimeError(f'Post date {post_date} doesn\'t match the folder year {year}: {post_title}') raise RuntimeError(
post_url_part = post.replace('.md', '') f"Post date {post_date} doesn't match the folder year {year}: {post_title}"
post_meta_items.append((post_date, { )
'date': post_date, post_url_part = post.replace(".md", "")
'title': post_title, post_meta_items.append(
'image': meta.get('image'), (
'url': f'/blog/{lang}/{year}/{post_url_part}/' post_date,
},)) {
"date": post_date,
"title": post_title,
"image": meta.get("image"),
"url": f"/blog/{lang}/{year}/{post_url_part}/",
},
)
)
for _, title, path in sorted(posts, reverse=True): for _, title, path in sorted(posts, reverse=True):
result_nav[-1][year][title] = path result_nav[-1][year][title] = path
for _, post_meta_item in sorted(post_meta_items, for _, post_meta_item in sorted(
reverse=True, post_meta_items, reverse=True, key=lambda item: item[0]
key=lambda item: item[0]): ):
post_meta[post_meta_item['title']] = post_meta_item post_meta[post_meta_item["title"]] = post_meta_item
return result_nav, post_meta return result_nav, post_meta
def _custom_get_navigation(files, config): def _custom_get_navigation(files, config):
nav_config = config['nav'] or mkdocs.structure.nav.nest_paths(f.src_path for f in files.documentation_pages()) nav_config = config["nav"] or mkdocs.structure.nav.nest_paths(
f.src_path for f in files.documentation_pages()
)
items = mkdocs.structure.nav._data_to_navigation(nav_config, files, config) items = mkdocs.structure.nav._data_to_navigation(nav_config, files, config)
if not isinstance(items, list): if not isinstance(items, list):
items = [items] items = [items]
@ -138,19 +155,25 @@ def _custom_get_navigation(files, config):
mkdocs.structure.nav._add_previous_and_next_links(pages) mkdocs.structure.nav._add_previous_and_next_links(pages)
mkdocs.structure.nav._add_parent_links(items) mkdocs.structure.nav._add_parent_links(items)
missing_from_config = [file for file in files.documentation_pages() if file.page is None] missing_from_config = [
file for file in files.documentation_pages() if file.page is None
]
if missing_from_config: if missing_from_config:
files._files = [file for file in files._files if file not in missing_from_config] files._files = [
file for file in files._files if file not in missing_from_config
]
links = mkdocs.structure.nav._get_by_type(items, mkdocs.structure.nav.Link) links = mkdocs.structure.nav._get_by_type(items, mkdocs.structure.nav.Link)
for link in links: for link in links:
scheme, netloc, path, params, query, fragment = mkdocs.structure.nav.urlparse(link.url) scheme, netloc, path, params, query, fragment = mkdocs.structure.nav.urlparse(
link.url
)
if scheme or netloc: if scheme or netloc:
mkdocs.structure.nav.log.debug( mkdocs.structure.nav.log.debug(
"An external link to '{}' is included in " "An external link to '{}' is included in "
"the 'nav' configuration.".format(link.url) "the 'nav' configuration.".format(link.url)
) )
elif link.url.startswith('/'): elif link.url.startswith("/"):
mkdocs.structure.nav.log.debug( mkdocs.structure.nav.log.debug(
"An absolute path to '{}' is included in the 'nav' configuration, " "An absolute path to '{}' is included in the 'nav' configuration, "
"which presumably points to an external resource.".format(link.url) "which presumably points to an external resource.".format(link.url)

View File

@ -7,8 +7,9 @@ def write_redirect_html(out_path, to_url):
os.makedirs(out_dir) os.makedirs(out_dir)
except OSError: except OSError:
pass pass
with open(out_path, 'w') as f: with open(out_path, "w") as f:
f.write(f'''<!--[if IE 6]> Redirect: {to_url} <![endif]--> f.write(
f"""<!--[if IE 6]> Redirect: {to_url} <![endif]-->
<!DOCTYPE HTML> <!DOCTYPE HTML>
<html lang="en-US"> <html lang="en-US">
<head> <head>
@ -22,18 +23,20 @@ def write_redirect_html(out_path, to_url):
<body> <body>
If you are not redirected automatically, follow this <a href="{to_url}">link</a>. If you are not redirected automatically, follow this <a href="{to_url}">link</a>.
</body> </body>
</html>''') </html>"""
)
def build_redirect_html(args, base_prefix, lang, output_dir, from_path, to_path): def build_redirect_html(args, base_prefix, lang, output_dir, from_path, to_path):
out_path = os.path.join( out_path = os.path.join(
output_dir, lang, output_dir,
from_path.replace('/index.md', '/index.html').replace('.md', '/index.html') lang,
from_path.replace("/index.md", "/index.html").replace(".md", "/index.html"),
) )
target_path = to_path.replace('/index.md', '/').replace('.md', '/') target_path = to_path.replace("/index.md", "/").replace(".md", "/")
if target_path[0:7] != 'http://' and target_path[0:8] != 'https://': if target_path[0:7] != "http://" and target_path[0:8] != "https://":
to_url = f'/{base_prefix}/{lang}/{target_path}' to_url = f"/{base_prefix}/{lang}/{target_path}"
else: else:
to_url = target_path to_url = target_path
@ -42,33 +45,48 @@ def build_redirect_html(args, base_prefix, lang, output_dir, from_path, to_path)
def build_docs_redirects(args): def build_docs_redirects(args):
with open(os.path.join(args.docs_dir, 'redirects.txt'), 'r') as f: with open(os.path.join(args.docs_dir, "redirects.txt"), "r") as f:
for line in f: for line in f:
for lang in args.lang.split(','): for lang in args.lang.split(","):
from_path, to_path = line.split(' ', 1) from_path, to_path = line.split(" ", 1)
build_redirect_html(args, 'docs', lang, args.docs_output_dir, from_path, to_path) build_redirect_html(
args, "docs", lang, args.docs_output_dir, from_path, to_path
)
def build_blog_redirects(args): def build_blog_redirects(args):
for lang in args.blog_lang.split(','): for lang in args.blog_lang.split(","):
redirects_path = os.path.join(args.blog_dir, lang, 'redirects.txt') redirects_path = os.path.join(args.blog_dir, lang, "redirects.txt")
if os.path.exists(redirects_path): if os.path.exists(redirects_path):
with open(redirects_path, 'r') as f: with open(redirects_path, "r") as f:
for line in f: for line in f:
from_path, to_path = line.split(' ', 1) from_path, to_path = line.split(" ", 1)
build_redirect_html(args, 'blog', lang, args.blog_output_dir, from_path, to_path) build_redirect_html(
args, "blog", lang, args.blog_output_dir, from_path, to_path
)
def build_static_redirects(args): def build_static_redirects(args):
for static_redirect in [ for static_redirect in [
('benchmark.html', '/benchmark/dbms/'), ("benchmark.html", "/benchmark/dbms/"),
('benchmark_hardware.html', '/benchmark/hardware/'), ("benchmark_hardware.html", "/benchmark/hardware/"),
('tutorial.html', '/docs/en/getting_started/tutorial/',), (
('reference_en.html', '/docs/en/single/', ), "tutorial.html",
('reference_ru.html', '/docs/ru/single/',), "/docs/en/getting_started/tutorial/",
('docs/index.html', '/docs/en/',), ),
(
"reference_en.html",
"/docs/en/single/",
),
(
"reference_ru.html",
"/docs/ru/single/",
),
(
"docs/index.html",
"/docs/en/",
),
]: ]:
write_redirect_html( write_redirect_html(
os.path.join(args.output_dir, static_redirect[0]), os.path.join(args.output_dir, static_redirect[0]), static_redirect[1]
static_redirect[1]
) )

View File

@ -12,7 +12,8 @@ import test
import util import util
import website import website
TEMPORARY_FILE_NAME = 'single.md' TEMPORARY_FILE_NAME = "single.md"
def recursive_values(item): def recursive_values(item):
if isinstance(item, dict): if isinstance(item, dict):
@ -25,11 +26,14 @@ def recursive_values(item):
yield item yield item
anchor_not_allowed_chars = re.compile(r'[^\w\-]') anchor_not_allowed_chars = re.compile(r"[^\w\-]")
def generate_anchor_from_path(path):
return re.sub(anchor_not_allowed_chars, '-', path)
absolute_link = re.compile(r'^https?://')
def generate_anchor_from_path(path):
return re.sub(anchor_not_allowed_chars, "-", path)
absolute_link = re.compile(r"^https?://")
def replace_link(match, path): def replace_link(match, path):
@ -40,46 +44,55 @@ def replace_link(match, path):
if re.search(absolute_link, link): if re.search(absolute_link, link):
return match.group(0) return match.group(0)
if link.endswith('/'): if link.endswith("/"):
link = link[0:-1] + '.md' link = link[0:-1] + ".md"
return '{}(#{})'.format(title, generate_anchor_from_path(os.path.normpath(os.path.join(os.path.dirname(path), link)))) return "{}(#{})".format(
title,
generate_anchor_from_path(
os.path.normpath(os.path.join(os.path.dirname(path), link))
),
)
# Concatenates Markdown files to a single file. # Concatenates Markdown files to a single file.
def concatenate(lang, docs_path, single_page_file, nav): def concatenate(lang, docs_path, single_page_file, nav):
lang_path = os.path.join(docs_path, lang) lang_path = os.path.join(docs_path, lang)
proj_config = f'{docs_path}/toc_{lang}.yml' proj_config = f"{docs_path}/toc_{lang}.yml"
if os.path.exists(proj_config): if os.path.exists(proj_config):
with open(proj_config) as cfg_file: with open(proj_config) as cfg_file:
nav = yaml.full_load(cfg_file.read())['nav'] nav = yaml.full_load(cfg_file.read())["nav"]
files_to_concatenate = list(recursive_values(nav)) files_to_concatenate = list(recursive_values(nav))
files_count = len(files_to_concatenate) files_count = len(files_to_concatenate)
logging.info(f'{files_count} files will be concatenated into single md-file for {lang}.') logging.info(
logging.debug('Concatenating: ' + ', '.join(files_to_concatenate)) f"{files_count} files will be concatenated into single md-file for {lang}."
assert files_count > 0, f'Empty single-page for {lang}' )
logging.debug("Concatenating: " + ", ".join(files_to_concatenate))
assert files_count > 0, f"Empty single-page for {lang}"
link_regexp = re.compile(r'(\[[^\]]+\])\(([^)#]+)(?:#[^\)]+)?\)') link_regexp = re.compile(r"(\[[^\]]+\])\(([^)#]+)(?:#[^\)]+)?\)")
for path in files_to_concatenate: for path in files_to_concatenate:
try: try:
with open(os.path.join(lang_path, path)) as f: with open(os.path.join(lang_path, path)) as f:
# Insert a horizontal ruler. Then insert an anchor that we will link to. Its name will be a path to the .md file. # Insert a horizontal ruler. Then insert an anchor that we will link to. Its name will be a path to the .md file.
single_page_file.write('\n______\n<a name="%s"></a>\n' % generate_anchor_from_path(path)) single_page_file.write(
'\n______\n<a name="%s"></a>\n' % generate_anchor_from_path(path)
)
in_metadata = False in_metadata = False
for line in f: for line in f:
# Skip YAML metadata. # Skip YAML metadata.
if line == '---\n': if line == "---\n":
in_metadata = not in_metadata in_metadata = not in_metadata
continue continue
if not in_metadata: if not in_metadata:
# Increase the level of headers. # Increase the level of headers.
if line.startswith('#'): if line.startswith("#"):
line = '#' + line line = "#" + line
# Replace links within the docs. # Replace links within the docs.
@ -87,14 +100,19 @@ def concatenate(lang, docs_path, single_page_file, nav):
line = re.sub( line = re.sub(
link_regexp, link_regexp,
lambda match: replace_link(match, path), lambda match: replace_link(match, path),
line) line,
)
# If failed to replace the relative link, print to log # If failed to replace the relative link, print to log
# But with some exceptions: # But with some exceptions:
# - "../src/" -- for cmake-in-clickhouse.md (link to sources) # - "../src/" -- for cmake-in-clickhouse.md (link to sources)
# - "../usr/share" -- changelog entry that has "../usr/share/zoneinfo" # - "../usr/share" -- changelog entry that has "../usr/share/zoneinfo"
if '../' in line and (not '../usr/share' in line) and (not '../src/' in line): if (
logging.info('Failed to resolve relative link:') "../" in line
and (not "../usr/share" in line)
and (not "../src/" in line)
):
logging.info("Failed to resolve relative link:")
logging.info(path) logging.info(path)
logging.info(line) logging.info(line)
@ -105,9 +123,11 @@ def concatenate(lang, docs_path, single_page_file, nav):
single_page_file.flush() single_page_file.flush()
def get_temporary_file_name(lang, args): def get_temporary_file_name(lang, args):
return os.path.join(args.docs_dir, lang, TEMPORARY_FILE_NAME) return os.path.join(args.docs_dir, lang, TEMPORARY_FILE_NAME)
def remove_temporary_files(lang, args): def remove_temporary_files(lang, args):
single_md_path = get_temporary_file_name(lang, args) single_md_path = get_temporary_file_name(lang, args)
if os.path.exists(single_md_path): if os.path.exists(single_md_path):
@ -115,14 +135,14 @@ def remove_temporary_files(lang, args):
def build_single_page_version(lang, args, nav, cfg): def build_single_page_version(lang, args, nav, cfg):
logging.info(f'Building single page version for {lang}') logging.info(f"Building single page version for {lang}")
os.environ['SINGLE_PAGE'] = '1' os.environ["SINGLE_PAGE"] = "1"
extra = cfg.data['extra'] extra = cfg.data["extra"]
extra['single_page'] = True extra["single_page"] = True
extra['is_amp'] = False extra["is_amp"] = False
single_md_path = get_temporary_file_name(lang, args) single_md_path = get_temporary_file_name(lang, args)
with open(single_md_path, 'w') as single_md: with open(single_md_path, "w") as single_md:
concatenate(lang, args.docs_dir, single_md, nav) concatenate(lang, args.docs_dir, single_md, nav)
with util.temp_dir() as site_temp: with util.temp_dir() as site_temp:
@ -132,72 +152,83 @@ def build_single_page_version(lang, args, nav, cfg):
shutil.copytree(docs_src_lang, docs_temp_lang) shutil.copytree(docs_src_lang, docs_temp_lang)
for root, _, filenames in os.walk(docs_temp_lang): for root, _, filenames in os.walk(docs_temp_lang):
for filename in filenames: for filename in filenames:
if filename != 'single.md' and filename.endswith('.md'): if filename != "single.md" and filename.endswith(".md"):
os.unlink(os.path.join(root, filename)) os.unlink(os.path.join(root, filename))
cfg.load_dict({ cfg.load_dict(
'docs_dir': docs_temp_lang, {
'site_dir': site_temp, "docs_dir": docs_temp_lang,
'extra': extra, "site_dir": site_temp,
'nav': [ "extra": extra,
{cfg.data.get('site_name'): 'single.md'} "nav": [{cfg.data.get("site_name"): "single.md"}],
] }
}) )
if not args.test_only: if not args.test_only:
mkdocs.commands.build.build(cfg) mkdocs.commands.build.build(cfg)
single_page_output_path = os.path.join(args.docs_dir, args.docs_output_dir, lang, 'single') single_page_output_path = os.path.join(
args.docs_dir, args.docs_output_dir, lang, "single"
)
if os.path.exists(single_page_output_path): if os.path.exists(single_page_output_path):
shutil.rmtree(single_page_output_path) shutil.rmtree(single_page_output_path)
shutil.copytree( shutil.copytree(
os.path.join(site_temp, 'single'), os.path.join(site_temp, "single"), single_page_output_path
single_page_output_path
) )
single_page_index_html = os.path.join(single_page_output_path, 'index.html') single_page_index_html = os.path.join(
single_page_content_js = os.path.join(single_page_output_path, 'content.js') single_page_output_path, "index.html"
)
single_page_content_js = os.path.join(
single_page_output_path, "content.js"
)
with open(single_page_index_html, 'r') as f: with open(single_page_index_html, "r") as f:
sp_prefix, sp_js, sp_suffix = f.read().split('<!-- BREAK -->') sp_prefix, sp_js, sp_suffix = f.read().split("<!-- BREAK -->")
with open(single_page_index_html, 'w') as f: with open(single_page_index_html, "w") as f:
f.write(sp_prefix) f.write(sp_prefix)
f.write(sp_suffix) f.write(sp_suffix)
with open(single_page_content_js, 'w') as f: with open(single_page_content_js, "w") as f:
if args.minify: if args.minify:
import jsmin import jsmin
sp_js = jsmin.jsmin(sp_js) sp_js = jsmin.jsmin(sp_js)
f.write(sp_js) f.write(sp_js)
logging.info(f'Re-building single page for {lang} pdf/test') logging.info(f"Re-building single page for {lang} pdf/test")
with util.temp_dir() as test_dir: with util.temp_dir() as test_dir:
extra['single_page'] = False extra["single_page"] = False
cfg.load_dict({ cfg.load_dict(
'docs_dir': docs_temp_lang, {
'site_dir': test_dir, "docs_dir": docs_temp_lang,
'extra': extra, "site_dir": test_dir,
'nav': [ "extra": extra,
{cfg.data.get('site_name'): 'single.md'} "nav": [{cfg.data.get("site_name"): "single.md"}],
] }
}) )
mkdocs.commands.build.build(cfg) mkdocs.commands.build.build(cfg)
css_in = ' '.join(website.get_css_in(args)) css_in = " ".join(website.get_css_in(args))
js_in = ' '.join(website.get_js_in(args)) js_in = " ".join(website.get_js_in(args))
subprocess.check_call(f'cat {css_in} > {test_dir}/css/base.css', shell=True) subprocess.check_call(
subprocess.check_call(f'cat {js_in} > {test_dir}/js/base.js', shell=True) f"cat {css_in} > {test_dir}/css/base.css", shell=True
)
subprocess.check_call(
f"cat {js_in} > {test_dir}/js/base.js", shell=True
)
if args.save_raw_single_page: if args.save_raw_single_page:
shutil.copytree(test_dir, args.save_raw_single_page) shutil.copytree(test_dir, args.save_raw_single_page)
logging.info(f'Running tests for {lang}') logging.info(f"Running tests for {lang}")
test.test_single_page( test.test_single_page(
os.path.join(test_dir, 'single', 'index.html'), lang) os.path.join(test_dir, "single", "index.html"), lang
)
logging.info(f'Finished building single page version for {lang}') logging.info(f"Finished building single page version for {lang}")
remove_temporary_files(lang, args) remove_temporary_files(lang, args)

View File

@ -8,14 +8,11 @@ import subprocess
def test_single_page(input_path, lang): def test_single_page(input_path, lang):
if not (lang == 'en'): if not (lang == "en"):
return return
with open(input_path) as f: with open(input_path) as f:
soup = bs4.BeautifulSoup( soup = bs4.BeautifulSoup(f, features="html.parser")
f,
features='html.parser'
)
anchor_points = set() anchor_points = set()
@ -23,30 +20,27 @@ def test_single_page(input_path, lang):
links_to_nowhere = 0 links_to_nowhere = 0
for tag in soup.find_all(): for tag in soup.find_all():
for anchor_point in [tag.attrs.get('name'), tag.attrs.get('id')]: for anchor_point in [tag.attrs.get("name"), tag.attrs.get("id")]:
if anchor_point: if anchor_point:
anchor_points.add(anchor_point) anchor_points.add(anchor_point)
for tag in soup.find_all(): for tag in soup.find_all():
href = tag.attrs.get('href') href = tag.attrs.get("href")
if href and href.startswith('#') and href != '#': if href and href.startswith("#") and href != "#":
if href[1:] not in anchor_points: if href[1:] not in anchor_points:
links_to_nowhere += 1 links_to_nowhere += 1
logging.info("Tag %s", tag) logging.info("Tag %s", tag)
logging.info('Link to nowhere: %s' % href) logging.info("Link to nowhere: %s" % href)
if links_to_nowhere: if links_to_nowhere:
logging.error(f'Found {links_to_nowhere} links to nowhere in {lang}') logging.error(f"Found {links_to_nowhere} links to nowhere in {lang}")
sys.exit(1) sys.exit(1)
if len(anchor_points) <= 10: if len(anchor_points) <= 10:
logging.error('Html parsing is probably broken') logging.error("Html parsing is probably broken")
sys.exit(1) sys.exit(1)
if __name__ == '__main__': if __name__ == "__main__":
logging.basicConfig( logging.basicConfig(level=logging.DEBUG, stream=sys.stderr)
level=logging.DEBUG,
stream=sys.stderr
)
test_single_page(sys.argv[1], sys.argv[2]) test_single_page(sys.argv[1], sys.argv[2])

View File

@ -15,7 +15,7 @@ import yaml
@contextlib.contextmanager @contextlib.contextmanager
def temp_dir(): def temp_dir():
path = tempfile.mkdtemp(dir=os.environ.get('TEMP')) path = tempfile.mkdtemp(dir=os.environ.get("TEMP"))
try: try:
yield path yield path
finally: finally:
@ -34,7 +34,7 @@ def cd(new_cwd):
def get_free_port(): def get_free_port():
with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
s.bind(('', 0)) s.bind(("", 0))
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
return s.getsockname()[1] return s.getsockname()[1]
@ -61,12 +61,12 @@ def read_md_file(path):
meta_text = [] meta_text = []
content = [] content = []
if os.path.exists(path): if os.path.exists(path):
with open(path, 'r') as f: with open(path, "r") as f:
for line in f: for line in f:
if line.startswith('---'): if line.startswith("---"):
if in_meta: if in_meta:
in_meta = False in_meta = False
meta = yaml.full_load(''.join(meta_text)) meta = yaml.full_load("".join(meta_text))
else: else:
in_meta = True in_meta = True
else: else:
@ -74,7 +74,7 @@ def read_md_file(path):
meta_text.append(line) meta_text.append(line)
else: else:
content.append(line) content.append(line)
return meta, ''.join(content) return meta, "".join(content)
def write_md_file(path, meta, content): def write_md_file(path, meta, content):
@ -82,13 +82,13 @@ def write_md_file(path, meta, content):
if not os.path.exists(dirname): if not os.path.exists(dirname):
os.makedirs(dirname) os.makedirs(dirname)
with open(path, 'w') as f: with open(path, "w") as f:
if meta: if meta:
print('---', file=f) print("---", file=f)
yaml.dump(meta, f) yaml.dump(meta, f)
print('---', file=f) print("---", file=f)
if not content.startswith('\n'): if not content.startswith("\n"):
print('', file=f) print("", file=f)
f.write(content) f.write(content)
@ -100,7 +100,7 @@ def represent_ordereddict(dumper, data):
value.append((node_key, node_value)) value.append((node_key, node_value))
return yaml.nodes.MappingNode(u'tag:yaml.org,2002:map', value) return yaml.nodes.MappingNode("tag:yaml.org,2002:map", value)
yaml.add_representer(collections.OrderedDict, represent_ordereddict) yaml.add_representer(collections.OrderedDict, represent_ordereddict)
@ -109,30 +109,31 @@ yaml.add_representer(collections.OrderedDict, represent_ordereddict)
def init_jinja2_filters(env): def init_jinja2_filters(env):
import amp import amp
import website import website
chunk_size = 10240 chunk_size = 10240
env.filters['chunks'] = lambda line: [line[i:i + chunk_size] for i in range(0, len(line), chunk_size)] env.filters["chunks"] = lambda line: [
env.filters['html_to_amp'] = amp.html_to_amp line[i : i + chunk_size] for i in range(0, len(line), chunk_size)
env.filters['adjust_markdown_html'] = website.adjust_markdown_html ]
env.filters['to_rfc882'] = lambda d: datetime.datetime.strptime(d, '%Y-%m-%d').strftime('%a, %d %b %Y %H:%M:%S GMT') env.filters["html_to_amp"] = amp.html_to_amp
env.filters["adjust_markdown_html"] = website.adjust_markdown_html
env.filters["to_rfc882"] = lambda d: datetime.datetime.strptime(
d, "%Y-%m-%d"
).strftime("%a, %d %b %Y %H:%M:%S GMT")
def init_jinja2_env(args): def init_jinja2_env(args):
import mdx_clickhouse import mdx_clickhouse
env = jinja2.Environment( env = jinja2.Environment(
loader=jinja2.FileSystemLoader([ loader=jinja2.FileSystemLoader(
args.website_dir, [args.website_dir, os.path.join(args.docs_dir, "_includes")]
os.path.join(args.docs_dir, '_includes') ),
]), extensions=["jinja2.ext.i18n", "jinja2_highlight.HighlightExtension"],
extensions=[
'jinja2.ext.i18n',
'jinja2_highlight.HighlightExtension'
]
) )
env.extend(jinja2_highlight_cssclass='syntax p-3 my-3') env.extend(jinja2_highlight_cssclass="syntax p-3 my-3")
translations_dir = os.path.join(args.website_dir, 'locale') translations_dir = os.path.join(args.website_dir, "locale")
env.install_gettext_translations( env.install_gettext_translations(
mdx_clickhouse.get_translations(translations_dir, 'en'), mdx_clickhouse.get_translations(translations_dir, "en"), newstyle=True
newstyle=True
) )
init_jinja2_filters(env) init_jinja2_filters(env)
return env return env

View File

@ -17,108 +17,112 @@ import util
def handle_iframe(iframe, soup): def handle_iframe(iframe, soup):
allowed_domains = ['https://www.youtube.com/', 'https://datalens.yandex/'] allowed_domains = ["https://www.youtube.com/", "https://datalens.yandex/"]
illegal_domain = True illegal_domain = True
iframe_src = iframe.attrs['src'] iframe_src = iframe.attrs["src"]
for domain in allowed_domains: for domain in allowed_domains:
if iframe_src.startswith(domain): if iframe_src.startswith(domain):
illegal_domain = False illegal_domain = False
break break
if illegal_domain: if illegal_domain:
raise RuntimeError(f'iframe from illegal domain: {iframe_src}') raise RuntimeError(f"iframe from illegal domain: {iframe_src}")
wrapper = soup.new_tag('div') wrapper = soup.new_tag("div")
wrapper.attrs['class'] = ['embed-responsive', 'embed-responsive-16by9'] wrapper.attrs["class"] = ["embed-responsive", "embed-responsive-16by9"]
iframe.insert_before(wrapper) iframe.insert_before(wrapper)
iframe.extract() iframe.extract()
wrapper.insert(0, iframe) wrapper.insert(0, iframe)
if 'width' in iframe.attrs: if "width" in iframe.attrs:
del iframe.attrs['width'] del iframe.attrs["width"]
if 'height' in iframe.attrs: if "height" in iframe.attrs:
del iframe.attrs['height'] del iframe.attrs["height"]
iframe.attrs['allow'] = 'accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture' iframe.attrs[
iframe.attrs['class'] = 'embed-responsive-item' "allow"
iframe.attrs['frameborder'] = '0' ] = "accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture"
iframe.attrs['allowfullscreen'] = '1' iframe.attrs["class"] = "embed-responsive-item"
iframe.attrs["frameborder"] = "0"
iframe.attrs["allowfullscreen"] = "1"
def adjust_markdown_html(content): def adjust_markdown_html(content):
soup = bs4.BeautifulSoup( soup = bs4.BeautifulSoup(content, features="html.parser")
content,
features='html.parser'
)
for a in soup.find_all('a'): for a in soup.find_all("a"):
a_class = a.attrs.get('class') a_class = a.attrs.get("class")
a_href = a.attrs.get('href') a_href = a.attrs.get("href")
if a_class and 'headerlink' in a_class: if a_class and "headerlink" in a_class:
a.string = '\xa0' a.string = "\xa0"
if a_href and a_href.startswith('http'): if a_href and a_href.startswith("http"):
a.attrs['target'] = '_blank' a.attrs["target"] = "_blank"
for code in soup.find_all('code'): for code in soup.find_all("code"):
code_class = code.attrs.get('class') code_class = code.attrs.get("class")
if code_class: if code_class:
code.attrs['class'] = code_class + ['syntax'] code.attrs["class"] = code_class + ["syntax"]
else: else:
code.attrs['class'] = 'syntax' code.attrs["class"] = "syntax"
for iframe in soup.find_all('iframe'): for iframe in soup.find_all("iframe"):
handle_iframe(iframe, soup) handle_iframe(iframe, soup)
for img in soup.find_all('img'): for img in soup.find_all("img"):
if img.attrs.get('alt') == 'iframe': if img.attrs.get("alt") == "iframe":
img.name = 'iframe' img.name = "iframe"
img.string = '' img.string = ""
handle_iframe(img, soup) handle_iframe(img, soup)
continue continue
img_class = img.attrs.get('class') img_class = img.attrs.get("class")
if img_class: if img_class:
img.attrs['class'] = img_class + ['img-fluid'] img.attrs["class"] = img_class + ["img-fluid"]
else: else:
img.attrs['class'] = 'img-fluid' img.attrs["class"] = "img-fluid"
for details in soup.find_all('details'): for details in soup.find_all("details"):
for summary in details.find_all('summary'): for summary in details.find_all("summary"):
if summary.parent != details: if summary.parent != details:
summary.extract() summary.extract()
details.insert(0, summary) details.insert(0, summary)
for dd in soup.find_all('dd'): for dd in soup.find_all("dd"):
dd_class = dd.attrs.get('class') dd_class = dd.attrs.get("class")
if dd_class: if dd_class:
dd.attrs['class'] = dd_class + ['pl-3'] dd.attrs["class"] = dd_class + ["pl-3"]
else: else:
dd.attrs['class'] = 'pl-3' dd.attrs["class"] = "pl-3"
for div in soup.find_all('div'): for div in soup.find_all("div"):
div_class = div.attrs.get('class') div_class = div.attrs.get("class")
is_admonition = div_class and 'admonition' in div.attrs.get('class') is_admonition = div_class and "admonition" in div.attrs.get("class")
if is_admonition: if is_admonition:
for a in div.find_all('a'): for a in div.find_all("a"):
a_class = a.attrs.get('class') a_class = a.attrs.get("class")
if a_class: if a_class:
a.attrs['class'] = a_class + ['alert-link'] a.attrs["class"] = a_class + ["alert-link"]
else: else:
a.attrs['class'] = 'alert-link' a.attrs["class"] = "alert-link"
for p in div.find_all('p'): for p in div.find_all("p"):
p_class = p.attrs.get('class') p_class = p.attrs.get("class")
if is_admonition and p_class and ('admonition-title' in p_class): if is_admonition and p_class and ("admonition-title" in p_class):
p.attrs['class'] = p_class + ['alert-heading', 'display-4', 'text-reset', 'mb-2'] p.attrs["class"] = p_class + [
"alert-heading",
"display-4",
"text-reset",
"mb-2",
]
if is_admonition: if is_admonition:
div.attrs['role'] = 'alert' div.attrs["role"] = "alert"
if ('info' in div_class) or ('note' in div_class): if ("info" in div_class) or ("note" in div_class):
mode = 'alert-primary' mode = "alert-primary"
elif ('attention' in div_class) or ('warning' in div_class): elif ("attention" in div_class) or ("warning" in div_class):
mode = 'alert-warning' mode = "alert-warning"
elif 'important' in div_class: elif "important" in div_class:
mode = 'alert-danger' mode = "alert-danger"
elif 'tip' in div_class: elif "tip" in div_class:
mode = 'alert-info' mode = "alert-info"
else: else:
mode = 'alert-secondary' mode = "alert-secondary"
div.attrs['class'] = div_class + ['alert', 'pb-0', 'mb-4', mode] div.attrs["class"] = div_class + ["alert", "pb-0", "mb-4", mode]
return str(soup) return str(soup)
@ -128,61 +132,63 @@ def minify_html(content):
def build_website(args): def build_website(args):
logging.info('Building website') logging.info("Building website")
env = util.init_jinja2_env(args) env = util.init_jinja2_env(args)
shutil.copytree( shutil.copytree(
args.website_dir, args.website_dir,
args.output_dir, args.output_dir,
ignore=shutil.ignore_patterns( ignore=shutil.ignore_patterns(
'*.md', "*.md",
'*.sh', "*.sh",
'*.css', "*.css",
'*.json', "*.json",
'js/*.js', "js/*.js",
'build', "build",
'docs', "docs",
'public', "public",
'node_modules', "node_modules",
'src', "src",
'templates', "templates",
'locale', "locale",
'.gitkeep' ".gitkeep",
) ),
) )
shutil.copytree( shutil.copytree(
os.path.join(args.website_dir, 'images'), os.path.join(args.website_dir, "images"),
os.path.join(args.output_dir, 'docs', 'images') os.path.join(args.output_dir, "docs", "images"),
) )
# This file can be requested to check for available ClickHouse releases. # This file can be requested to check for available ClickHouse releases.
shutil.copy2( shutil.copy2(
os.path.join(args.src_dir, 'utils', 'list-versions', 'version_date.tsv'), os.path.join(args.src_dir, "utils", "list-versions", "version_date.tsv"),
os.path.join(args.output_dir, 'data', 'version_date.tsv')) os.path.join(args.output_dir, "data", "version_date.tsv"),
)
# This file can be requested to install ClickHouse. # This file can be requested to install ClickHouse.
shutil.copy2( shutil.copy2(
os.path.join(args.src_dir, 'docs', '_includes', 'install', 'universal.sh'), os.path.join(args.src_dir, "docs", "_includes", "install", "universal.sh"),
os.path.join(args.output_dir, 'data', 'install.sh')) os.path.join(args.output_dir, "data", "install.sh"),
)
for root, _, filenames in os.walk(args.output_dir): for root, _, filenames in os.walk(args.output_dir):
for filename in filenames: for filename in filenames:
if filename == 'main.html': if filename == "main.html":
continue continue
path = os.path.join(root, filename) path = os.path.join(root, filename)
if not filename.endswith('.html'): if not filename.endswith(".html"):
continue continue
logging.info('Processing %s', path) logging.info("Processing %s", path)
with open(path, 'rb') as f: with open(path, "rb") as f:
content = f.read().decode('utf-8') content = f.read().decode("utf-8")
template = env.from_string(content) template = env.from_string(content)
content = template.render(args.__dict__) content = template.render(args.__dict__)
with open(path, 'wb') as f: with open(path, "wb") as f:
f.write(content.encode('utf-8')) f.write(content.encode("utf-8"))
def get_css_in(args): def get_css_in(args):
@ -193,7 +199,7 @@ def get_css_in(args):
f"'{args.website_dir}/css/blog.css'", f"'{args.website_dir}/css/blog.css'",
f"'{args.website_dir}/css/docs.css'", f"'{args.website_dir}/css/docs.css'",
f"'{args.website_dir}/css/highlight.css'", f"'{args.website_dir}/css/highlight.css'",
f"'{args.website_dir}/css/main.css'" f"'{args.website_dir}/css/main.css'",
] ]
@ -207,42 +213,41 @@ def get_js_in(args):
f"'{args.website_dir}/js/index.js'", f"'{args.website_dir}/js/index.js'",
f"'{args.website_dir}/js/docsearch.js'", f"'{args.website_dir}/js/docsearch.js'",
f"'{args.website_dir}/js/docs.js'", f"'{args.website_dir}/js/docs.js'",
f"'{args.website_dir}/js/main.js'" f"'{args.website_dir}/js/main.js'",
] ]
def minify_file(path, css_digest, js_digest): def minify_file(path, css_digest, js_digest):
if not ( if not (path.endswith(".html") or path.endswith(".css")):
path.endswith('.html') or
path.endswith('.css')
):
return return
logging.info('Minifying %s', path) logging.info("Minifying %s", path)
with open(path, 'rb') as f: with open(path, "rb") as f:
content = f.read().decode('utf-8') content = f.read().decode("utf-8")
if path.endswith('.html'): if path.endswith(".html"):
content = minify_html(content) content = minify_html(content)
content = content.replace('base.css?css_digest', f'base.css?{css_digest}') content = content.replace("base.css?css_digest", f"base.css?{css_digest}")
content = content.replace('base.js?js_digest', f'base.js?{js_digest}') content = content.replace("base.js?js_digest", f"base.js?{js_digest}")
# TODO: restore cssmin # TODO: restore cssmin
# elif path.endswith('.css'): # elif path.endswith('.css'):
# content = cssmin.cssmin(content) # content = cssmin.cssmin(content)
# TODO: restore jsmin # TODO: restore jsmin
# elif path.endswith('.js'): # elif path.endswith('.js'):
# content = jsmin.jsmin(content) # content = jsmin.jsmin(content)
with open(path, 'wb') as f: with open(path, "wb") as f:
f.write(content.encode('utf-8')) f.write(content.encode("utf-8"))
def minify_website(args): def minify_website(args):
css_in = ' '.join(get_css_in(args)) css_in = " ".join(get_css_in(args))
css_out = f'{args.output_dir}/docs/css/base.css' css_out = f"{args.output_dir}/docs/css/base.css"
os.makedirs(f'{args.output_dir}/docs/css') os.makedirs(f"{args.output_dir}/docs/css")
if args.minify and False: # TODO: return closure if args.minify and False: # TODO: return closure
command = f"purifycss -w '*algolia*' --min {css_in} '{args.output_dir}/*.html' " \ command = (
f"purifycss -w '*algolia*' --min {css_in} '{args.output_dir}/*.html' "
f"'{args.output_dir}/docs/en/**/*.html' '{args.website_dir}/js/**/*.js' > {css_out}" f"'{args.output_dir}/docs/en/**/*.html' '{args.website_dir}/js/**/*.js' > {css_out}"
)
logging.info(css_in) logging.info(css_in)
logging.info(command) logging.info(command)
output = subprocess.check_output(command, shell=True) output = subprocess.check_output(command, shell=True)
@ -251,51 +256,60 @@ def minify_website(args):
else: else:
command = f"cat {css_in}" command = f"cat {css_in}"
output = subprocess.check_output(command, shell=True) output = subprocess.check_output(command, shell=True)
with open(css_out, 'wb+') as f: with open(css_out, "wb+") as f:
f.write(output) f.write(output)
with open(css_out, 'rb') as f: with open(css_out, "rb") as f:
css_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8] css_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
js_in = ' '.join(get_js_in(args)) js_in = " ".join(get_js_in(args))
js_out = f'{args.output_dir}/docs/js/base.js' js_out = f"{args.output_dir}/docs/js/base.js"
os.makedirs(f'{args.output_dir}/docs/js') os.makedirs(f"{args.output_dir}/docs/js")
if args.minify and False: # TODO: return closure if args.minify and False: # TODO: return closure
js_in = [js[1:-1] for js in js_in] js_in = [js[1:-1] for js in js_in]
closure_args = [ closure_args = [
'--js', *js_in, '--js_output_file', js_out, "--js",
'--compilation_level', 'SIMPLE', *js_in,
'--dependency_mode', 'NONE', "--js_output_file",
'--third_party', '--use_types_for_optimization', js_out,
'--isolation_mode', 'IIFE' "--compilation_level",
"SIMPLE",
"--dependency_mode",
"NONE",
"--third_party",
"--use_types_for_optimization",
"--isolation_mode",
"IIFE",
] ]
logging.info(closure_args) logging.info(closure_args)
if closure.run(*closure_args): if closure.run(*closure_args):
raise RuntimeError('failed to run closure compiler') raise RuntimeError("failed to run closure compiler")
with open(js_out, 'r') as f: with open(js_out, "r") as f:
js_content = jsmin.jsmin(f.read()) js_content = jsmin.jsmin(f.read())
with open(js_out, 'w') as f: with open(js_out, "w") as f:
f.write(js_content) f.write(js_content)
else: else:
command = f"cat {js_in}" command = f"cat {js_in}"
output = subprocess.check_output(command, shell=True) output = subprocess.check_output(command, shell=True)
with open(js_out, 'wb+') as f: with open(js_out, "wb+") as f:
f.write(output) f.write(output)
with open(js_out, 'rb') as f: with open(js_out, "rb") as f:
js_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8] js_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
logging.info(js_digest) logging.info(js_digest)
if args.minify: if args.minify:
logging.info('Minifying website') logging.info("Minifying website")
with concurrent.futures.ThreadPoolExecutor() as executor: with concurrent.futures.ThreadPoolExecutor() as executor:
futures = [] futures = []
for root, _, filenames in os.walk(args.output_dir): for root, _, filenames in os.walk(args.output_dir):
for filename in filenames: for filename in filenames:
path = os.path.join(root, filename) path = os.path.join(root, filename)
futures.append(executor.submit(minify_file, path, css_digest, js_digest)) futures.append(
executor.submit(minify_file, path, css_digest, js_digest)
)
for future in futures: for future in futures:
exc = future.exception() exc = future.exception()
if exc: if exc:
@ -304,24 +318,28 @@ def minify_website(args):
def process_benchmark_results(args): def process_benchmark_results(args):
benchmark_root = os.path.join(args.website_dir, 'benchmark') benchmark_root = os.path.join(args.website_dir, "benchmark")
required_keys = { required_keys = {
'dbms': ['result'], "dbms": ["result"],
'hardware': ['result', 'system', 'system_full', 'kind'] "hardware": ["result", "system", "system_full", "kind"],
} }
for benchmark_kind in ['dbms', 'hardware']: for benchmark_kind in ["dbms", "hardware"]:
results = [] results = []
results_root = os.path.join(benchmark_root, benchmark_kind, 'results') results_root = os.path.join(benchmark_root, benchmark_kind, "results")
for result in sorted(os.listdir(results_root)): for result in sorted(os.listdir(results_root)):
result_file = os.path.join(results_root, result) result_file = os.path.join(results_root, result)
logging.debug(f'Reading benchmark result from {result_file}') logging.debug(f"Reading benchmark result from {result_file}")
with open(result_file, 'r') as f: with open(result_file, "r") as f:
result = json.loads(f.read()) result = json.loads(f.read())
for item in result: for item in result:
for required_key in required_keys[benchmark_kind]: for required_key in required_keys[benchmark_kind]:
assert required_key in item, f'No "{required_key}" in {result_file}' assert (
required_key in item
), f'No "{required_key}" in {result_file}'
results += result results += result
results_js = os.path.join(args.output_dir, 'benchmark', benchmark_kind, 'results.js') results_js = os.path.join(
with open(results_js, 'w') as f: args.output_dir, "benchmark", benchmark_kind, "results.js"
)
with open(results_js, "w") as f:
data = json.dumps(results) data = json.dumps(results)
f.write(f'var results = {data};') f.write(f"var results = {data};")

View File

@ -7,16 +7,14 @@ import string
TOKEN_TEXT = 1 TOKEN_TEXT = 1
TOKEN_VAR = 2 TOKEN_VAR = 2
TOKEN_COLON = ':' TOKEN_COLON = ":"
TOKEN_SEMI = ';' TOKEN_SEMI = ";"
TOKEN_OR = '|' TOKEN_OR = "|"
TOKEN_QUESTIONMARK = '?' TOKEN_QUESTIONMARK = "?"
TOKEN_ROUND_BRACKET_OPEN = '(' TOKEN_ROUND_BRACKET_OPEN = "("
TOKEN_ROUND_BRACKET_CLOSE = ')' TOKEN_ROUND_BRACKET_CLOSE = ")"
TOKEN_ASTERISK = '*' TOKEN_ASTERISK = "*"
TOKEN_SLASH = '/' TOKEN_SLASH = "/"
class TextValue: class TextValue:
@ -27,9 +25,9 @@ class TextValue:
def get_slug(self): def get_slug(self):
if self.slug is not None: if self.slug is not None:
return self.slug return self.slug
slug = '' slug = ""
for c in self.t: for c in self.t:
slug += c if c in string.ascii_letters else '_' slug += c if c in string.ascii_letters else "_"
self.slug = slug self.slug = slug
return slug return slug
@ -37,12 +35,12 @@ class TextValue:
return f"TextValue_{self.get_slug()}" return f"TextValue_{self.get_slug()}"
def __repr__(self): def __repr__(self):
return f"TextValue(\"{self.t}\")" return f'TextValue("{self.t}")'
class Var: class Var:
def __init__(self, id_): def __init__(self, id_):
self.id_ = id_ self.id_ = id_
def __repr__(self): def __repr__(self):
return f"Var({self.id_})" return f"Var({self.id_})"
@ -59,8 +57,8 @@ class Parser:
self.cur_tok = None self.cur_tok = None
self.includes = [] self.includes = []
self.proto = '' self.proto = ""
self.cpp = '' self.cpp = ""
def parse_file(self, filename): def parse_file(self, filename):
with open(filename) as f: with open(filename) as f:
@ -81,7 +79,7 @@ class Parser:
if self.text[0] == '"': if self.text[0] == '"':
return self.parse_txt_value() return self.parse_txt_value()
if self.text[0] == '$': if self.text[0] == "$":
return self.parse_var_value() return self.parse_var_value()
c, self.text = self.text[0], self.text[1:] c, self.text = self.text[0], self.text[1:]
@ -89,9 +87,9 @@ class Parser:
return c return c
def parse_var_value(self): def parse_var_value(self):
i = self.text.find(' ') i = self.text.find(" ")
id_, self.text = self.text[1:i], self.text[i+1:] id_, self.text = self.text[1:i], self.text[i + 1 :]
self.var_id = int(id_) self.var_id = int(id_)
self.cur_tok = TOKEN_VAR self.cur_tok = TOKEN_VAR
return TOKEN_VAR return TOKEN_VAR
@ -100,12 +98,12 @@ class Parser:
if self.text[0] != '"': if self.text[0] != '"':
raise Exception("parse_txt_value: expected quote at the start") raise Exception("parse_txt_value: expected quote at the start")
self.t = '' self.t = ""
self.text = self.text[1:] self.text = self.text[1:]
while self.text[0] != '"': while self.text[0] != '"':
if self.text[0] == '\\': if self.text[0] == "\\":
if self.text[1] == 'x': if self.text[1] == "x":
self.t += self.text[:4] self.t += self.text[:4]
self.text = self.text[4:] self.text = self.text[4:]
elif self.text[1] in 'nt\\"': elif self.text[1] in 'nt\\"':
@ -123,7 +121,7 @@ class Parser:
def skip_ws(self): def skip_ws(self):
while self.text and self.text[0] in string.whitespace: while self.text and self.text[0] in string.whitespace:
if self.text[0] == '\n': if self.text[0] == "\n":
self.line += 1 self.line += 1
self.col = 0 self.col = 0
self.text = self.text[1:] self.text = self.text[1:]
@ -134,10 +132,9 @@ class Parser:
def skip_line(self): def skip_line(self):
self.line += 1 self.line += 1
index = self.text.find('\n') index = self.text.find("\n")
self.text = self.text[index:] self.text = self.text[index:]
def parse_statement(self): def parse_statement(self):
if self.skip_ws() is None: if self.skip_ws() is None:
return None return None
@ -164,52 +161,54 @@ class Parser:
def generate(self): def generate(self):
self.proto = 'syntax = "proto3";\n\n' self.proto = 'syntax = "proto3";\n\n'
self.cpp = '#include <iostream>\n#include <string>\n#include <vector>\n\n#include <libfuzzer/libfuzzer_macro.h>\n\n' self.cpp = "#include <iostream>\n#include <string>\n#include <vector>\n\n#include <libfuzzer/libfuzzer_macro.h>\n\n"
for incl_file in self.includes: for incl_file in self.includes:
self.cpp += f'#include "{incl_file}"\n' self.cpp += f'#include "{incl_file}"\n'
self.cpp += '\n' self.cpp += "\n"
self.proto += 'message Word {\n' self.proto += "message Word {\n"
self.proto += '\tenum Value {\n' self.proto += "\tenum Value {\n"
self.cpp += 'void GenerateWord(const Word&, std::string&, int);\n\n' self.cpp += "void GenerateWord(const Word&, std::string&, int);\n\n"
self.cpp += 'void GenerateSentence(const Sentence& stc, std::string &s, int depth) {\n' self.cpp += (
self.cpp += '\tfor (int i = 0; i < stc.words_size(); i++ ) {\n' "void GenerateSentence(const Sentence& stc, std::string &s, int depth) {\n"
self.cpp += '\t\tGenerateWord(stc.words(i), s, ++depth);\n' )
self.cpp += '\t}\n' self.cpp += "\tfor (int i = 0; i < stc.words_size(); i++ ) {\n"
self.cpp += '}\n' self.cpp += "\t\tGenerateWord(stc.words(i), s, ++depth);\n"
self.cpp += "\t}\n"
self.cpp += "}\n"
self.cpp += 'void GenerateWord(const Word& word, std::string &s, int depth) {\n' self.cpp += "void GenerateWord(const Word& word, std::string &s, int depth) {\n"
self.cpp += '\tif (depth > 5) return;\n\n' self.cpp += "\tif (depth > 5) return;\n\n"
self.cpp += '\tswitch (word.value()) {\n' self.cpp += "\tswitch (word.value()) {\n"
for idx, chain in enumerate(self.chains): for idx, chain in enumerate(self.chains):
self.proto += f'\t\tvalue_{idx} = {idx};\n' self.proto += f"\t\tvalue_{idx} = {idx};\n"
self.cpp += f'\t\tcase {idx}: {{\n' self.cpp += f"\t\tcase {idx}: {{\n"
num_var = 0 num_var = 0
for item in chain: for item in chain:
if isinstance(item, TextValue): if isinstance(item, TextValue):
self.cpp += f'\t\t\ts += "{item.t}";\n' self.cpp += f'\t\t\ts += "{item.t}";\n'
elif isinstance(item, Var): elif isinstance(item, Var):
self.cpp += f'\t\t\tif (word.inner().words_size() > {num_var})\t\t\t\tGenerateWord(word.inner().words({num_var}), s, ++depth);\n' self.cpp += f"\t\t\tif (word.inner().words_size() > {num_var})\t\t\t\tGenerateWord(word.inner().words({num_var}), s, ++depth);\n"
num_var += 1 num_var += 1
else: else:
raise Exception("unknown token met during generation") raise Exception("unknown token met during generation")
self.cpp += '\t\t\tbreak;\n\t\t}\n' self.cpp += "\t\t\tbreak;\n\t\t}\n"
self.cpp += '\t\tdefault: break;\n' self.cpp += "\t\tdefault: break;\n"
self.cpp += '\t}\n' self.cpp += "\t}\n"
self.proto += '\t}\n' self.proto += "\t}\n"
self.proto += '\tValue value = 1;\n' self.proto += "\tValue value = 1;\n"
self.proto += '\tSentence inner = 2;\n' self.proto += "\tSentence inner = 2;\n"
self.proto += '}\nmessage Sentence {\n\trepeated Word words = 1;\n}' self.proto += "}\nmessage Sentence {\n\trepeated Word words = 1;\n}"
self.cpp += '}\n' self.cpp += "}\n"
return self.cpp, self.proto return self.cpp, self.proto
def fatal_parsing_error(self, msg): def fatal_parsing_error(self, msg):
@ -220,7 +219,7 @@ class Parser:
def main(args): def main(args):
input_file, outfile_cpp, outfile_proto = args input_file, outfile_cpp, outfile_proto = args
if not outfile_proto.endswith('.proto'): if not outfile_proto.endswith(".proto"):
raise Exception("outfile_proto (argv[3]) should end with `.proto`") raise Exception("outfile_proto (argv[3]) should end with `.proto`")
include_filename = outfile_proto[:-6] + ".pb.h" include_filename = outfile_proto[:-6] + ".pb.h"
@ -231,17 +230,17 @@ def main(args):
cpp, proto = p.generate() cpp, proto = p.generate()
proto = proto.replace('\t', ' ' * 4) proto = proto.replace("\t", " " * 4)
cpp = cpp.replace('\t', ' ' * 4) cpp = cpp.replace("\t", " " * 4)
with open(outfile_cpp, 'w') as f: with open(outfile_cpp, "w") as f:
f.write(cpp) f.write(cpp)
with open(outfile_proto, 'w') as f: with open(outfile_proto, "w") as f:
f.write(proto) f.write(proto)
if __name__ == '__main__': if __name__ == "__main__":
if len(sys.argv) < 3: if len(sys.argv) < 3:
print(f"Usage {sys.argv[0]} <input_file> <outfile.cpp> <outfile.proto>") print(f"Usage {sys.argv[0]} <input_file> <outfile.cpp> <outfile.proto>")
sys.exit(1) sys.exit(1)

View File

@ -9,7 +9,9 @@ import re
parts = {} parts = {}
for s in sys.stdin.read().split(): for s in sys.stdin.read().split():
m = re.match('^([0-9]{6})[0-9]{2}_([0-9]{6})[0-9]{2}_([0-9]+)_([0-9]+)_([0-9]+)$', s) m = re.match(
"^([0-9]{6})[0-9]{2}_([0-9]{6})[0-9]{2}_([0-9]+)_([0-9]+)_([0-9]+)$", s
)
if m == None: if m == None:
continue continue
m1 = m.group(1) m1 = m.group(1)
@ -18,7 +20,7 @@ for s in sys.stdin.read().split():
i2 = int(m.group(4)) i2 = int(m.group(4))
l = int(m.group(5)) l = int(m.group(5))
if m1 != m2: if m1 != m2:
raise Exception('not in single month: ' + s) raise Exception("not in single month: " + s)
if m1 not in parts: if m1 not in parts:
parts[m1] = [] parts[m1] = []
parts[m1].append((i1, i2, l, s)) parts[m1].append((i1, i2, l, s))
@ -27,13 +29,13 @@ for m, ps in sorted(parts.items()):
ps.sort(key=lambda i1_i2_l_s: (i1_i2_l_s[0], -i1_i2_l_s[1], -i1_i2_l_s[2])) ps.sort(key=lambda i1_i2_l_s: (i1_i2_l_s[0], -i1_i2_l_s[1], -i1_i2_l_s[2]))
(x2, y2, l2, s2) = (-1, -1, -1, -1) (x2, y2, l2, s2) = (-1, -1, -1, -1)
for x1, y1, l1, s1 in ps: for x1, y1, l1, s1 in ps:
if x1 >= x2 and y1 <= y2 and l1 < l2 and (x1, y1) != (x2, y2): # 2 contains 1 if x1 >= x2 and y1 <= y2 and l1 < l2 and (x1, y1) != (x2, y2): # 2 contains 1
pass pass
elif x1 > y2: # 1 is to the right of 2 elif x1 > y2: # 1 is to the right of 2
if x1 != y2 + 1 and y2 != -1: if x1 != y2 + 1 and y2 != -1:
print() # to see the missing numbers print() # to see the missing numbers
(x2, y2, l2, s2) = (x1, y1, l1, s1) (x2, y2, l2, s2) = (x1, y1, l1, s1)
print(s1) print(s1)
else: else:
raise Exception('invalid parts intersection: ' + s1 + ' and ' + s2) raise Exception("invalid parts intersection: " + s1 + " and " + s2)
print() print()

View File

@ -7,8 +7,14 @@ import sys
from github import Github from github import Github
from env_helper import GITHUB_REPOSITORY, TEMP_PATH, REPO_COPY, REPORTS_PATH, GITHUB_SERVER_URL, \ from env_helper import (
GITHUB_RUN_ID GITHUB_REPOSITORY,
TEMP_PATH,
REPO_COPY,
REPORTS_PATH,
GITHUB_SERVER_URL,
GITHUB_RUN_ID,
)
from s3_helper import S3Helper from s3_helper import S3Helper
from get_robot_token import get_best_robot_token from get_robot_token import get_best_robot_token
from pr_info import PRInfo from pr_info import PRInfo
@ -19,19 +25,24 @@ from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickh
from stopwatch import Stopwatch from stopwatch import Stopwatch
from rerun_helper import RerunHelper from rerun_helper import RerunHelper
IMAGE_NAME = 'clickhouse/fuzzer' IMAGE_NAME = "clickhouse/fuzzer"
def get_run_command(pr_number, sha, download_url, workspace_path, image): def get_run_command(pr_number, sha, download_url, workspace_path, image):
return f'docker run --network=host --volume={workspace_path}:/workspace ' \ return (
'--cap-add syslog --cap-add sys_admin --cap-add=SYS_PTRACE ' \ f"docker run --network=host --volume={workspace_path}:/workspace "
f'-e PR_TO_TEST={pr_number} -e SHA_TO_TEST={sha} -e BINARY_URL_TO_DOWNLOAD="{download_url}" '\ "--cap-add syslog --cap-add sys_admin --cap-add=SYS_PTRACE "
f'{image}' f'-e PR_TO_TEST={pr_number} -e SHA_TO_TEST={sha} -e BINARY_URL_TO_DOWNLOAD="{download_url}" '
f"{image}"
)
def get_commit(gh, commit_sha): def get_commit(gh, commit_sha):
repo = gh.get_repo(GITHUB_REPOSITORY) repo = gh.get_repo(GITHUB_REPOSITORY)
commit = repo.get_commit(commit_sha) commit = repo.get_commit(commit_sha)
return commit return commit
if __name__ == "__main__": if __name__ == "__main__":
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
@ -64,7 +75,7 @@ if __name__ == "__main__":
raise Exception("No build URLs found") raise Exception("No build URLs found")
for url in urls: for url in urls:
if url.endswith('/clickhouse'): if url.endswith("/clickhouse"):
build_url = url build_url = url
break break
else: else:
@ -72,16 +83,20 @@ if __name__ == "__main__":
logging.info("Got build url %s", build_url) logging.info("Got build url %s", build_url)
workspace_path = os.path.join(temp_path, 'workspace') workspace_path = os.path.join(temp_path, "workspace")
if not os.path.exists(workspace_path): if not os.path.exists(workspace_path):
os.makedirs(workspace_path) os.makedirs(workspace_path)
run_command = get_run_command(pr_info.number, pr_info.sha, build_url, workspace_path, docker_image) run_command = get_run_command(
pr_info.number, pr_info.sha, build_url, workspace_path, docker_image
)
logging.info("Going to run %s", run_command) logging.info("Going to run %s", run_command)
run_log_path = os.path.join(temp_path, "runlog.log") run_log_path = os.path.join(temp_path, "runlog.log")
with open(run_log_path, 'w', encoding='utf-8') as log: with open(run_log_path, "w", encoding="utf-8") as log:
with subprocess.Popen(run_command, shell=True, stderr=log, stdout=log) as process: with subprocess.Popen(
run_command, shell=True, stderr=log, stdout=log
) as process:
retcode = process.wait() retcode = process.wait()
if retcode == 0: if retcode == 0:
logging.info("Run successfully") logging.info("Run successfully")
@ -90,56 +105,70 @@ if __name__ == "__main__":
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
check_name_lower = check_name.lower().replace('(', '').replace(')', '').replace(' ', '') check_name_lower = (
s3_prefix = f'{pr_info.number}/{pr_info.sha}/fuzzer_{check_name_lower}/' check_name.lower().replace("(", "").replace(")", "").replace(" ", "")
)
s3_prefix = f"{pr_info.number}/{pr_info.sha}/fuzzer_{check_name_lower}/"
paths = { paths = {
'runlog.log': run_log_path, "runlog.log": run_log_path,
'main.log': os.path.join(workspace_path, 'main.log'), "main.log": os.path.join(workspace_path, "main.log"),
'server.log': os.path.join(workspace_path, 'server.log'), "server.log": os.path.join(workspace_path, "server.log"),
'fuzzer.log': os.path.join(workspace_path, 'fuzzer.log'), "fuzzer.log": os.path.join(workspace_path, "fuzzer.log"),
'report.html': os.path.join(workspace_path, 'report.html'), "report.html": os.path.join(workspace_path, "report.html"),
'core.gz': os.path.join(workspace_path, 'core.gz'), "core.gz": os.path.join(workspace_path, "core.gz"),
} }
s3_helper = S3Helper('https://s3.amazonaws.com') s3_helper = S3Helper("https://s3.amazonaws.com")
for f in paths: for f in paths:
try: try:
paths[f] = s3_helper.upload_test_report_to_s3(paths[f], s3_prefix + '/' + f) paths[f] = s3_helper.upload_test_report_to_s3(paths[f], s3_prefix + "/" + f)
except Exception as ex: except Exception as ex:
logging.info("Exception uploading file %s text %s", f, ex) logging.info("Exception uploading file %s text %s", f, ex)
paths[f] = '' paths[f] = ""
report_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}" report_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}"
if paths['runlog.log']: if paths["runlog.log"]:
report_url = paths['runlog.log'] report_url = paths["runlog.log"]
if paths['main.log']: if paths["main.log"]:
report_url = paths['main.log'] report_url = paths["main.log"]
if paths['server.log']: if paths["server.log"]:
report_url = paths['server.log'] report_url = paths["server.log"]
if paths['fuzzer.log']: if paths["fuzzer.log"]:
report_url = paths['fuzzer.log'] report_url = paths["fuzzer.log"]
if paths['report.html']: if paths["report.html"]:
report_url = paths['report.html'] report_url = paths["report.html"]
# Try to get status message saved by the fuzzer # Try to get status message saved by the fuzzer
try: try:
with open(os.path.join(workspace_path, 'status.txt'), 'r', encoding='utf-8') as status_f: with open(
status = status_f.readline().rstrip('\n') os.path.join(workspace_path, "status.txt"), "r", encoding="utf-8"
) as status_f:
status = status_f.readline().rstrip("\n")
with open(os.path.join(workspace_path, 'description.txt'), 'r', encoding='utf-8') as desc_f: with open(
description = desc_f.readline().rstrip('\n')[:140] os.path.join(workspace_path, "description.txt"), "r", encoding="utf-8"
) as desc_f:
description = desc_f.readline().rstrip("\n")[:140]
except: except:
status = 'failure' status = "failure"
description = 'Task failed: $?=' + str(retcode) description = "Task failed: $?=" + str(retcode)
if 'fail' in status: if "fail" in status:
test_result = [(description, 'FAIL')] test_result = [(description, "FAIL")]
else: else:
test_result = [(description, 'OK')] test_result = [(description, "OK")]
ch_helper = ClickHouseHelper() ch_helper = ClickHouseHelper()
prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_result, status, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name) prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
test_result,
status,
stopwatch.duration_seconds,
stopwatch.start_time_str,
report_url,
check_name,
)
logging.info("Result: '%s', '%s', '%s'", status, description, report_url) logging.info("Result: '%s', '%s', '%s'", status, description, report_url)
print(f"::notice ::Report url: {report_url}") print(f"::notice ::Report url: {report_url}")

View File

@ -6,20 +6,20 @@ import itertools
import os import os
import sys import sys
NO_CHANGES_MSG = 'Nothing to run' NO_CHANGES_MSG = "Nothing to run"
def parse_args(): def parse_args():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('report1') parser.add_argument("report1")
parser.add_argument('report2') parser.add_argument("report2")
return parser.parse_args() return parser.parse_args()
def post_commit_status_from_file(file_path): def post_commit_status_from_file(file_path):
res = [] res = []
with open(file_path, 'r', encoding='utf-8') as f: with open(file_path, "r", encoding="utf-8") as f:
fin = csv.reader(f, delimiter='\t') fin = csv.reader(f, delimiter="\t")
res = list(itertools.islice(fin, 1)) res = list(itertools.islice(fin, 1))
if len(res) < 1: if len(res) < 1:
raise Exception(f'Can\'t read from "{file_path}"') raise Exception(f'Can\'t read from "{file_path}"')
@ -31,8 +31,10 @@ def post_commit_status_from_file(file_path):
def process_results(file_path): def process_results(file_path):
state, report_url, description = post_commit_status_from_file(file_path) state, report_url, description = post_commit_status_from_file(file_path)
prefix = os.path.basename(os.path.dirname(file_path)) prefix = os.path.basename(os.path.dirname(file_path))
print(f'::notice:: bugfix check: {prefix} - {state}: {description} Report url: {report_url}') print(
return state == 'success' f"::notice:: bugfix check: {prefix} - {state}: {description} Report url: {report_url}"
)
return state == "success"
def main(args): def main(args):
@ -42,5 +44,5 @@ def main(args):
sys.exit(0 if is_ok else 1) sys.exit(0 if is_ok else 1)
if __name__ == '__main__': if __name__ == "__main__":
main(parse_args()) main(parse_args())

View File

@ -6,7 +6,13 @@ import os
import sys import sys
from github import Github from github import Github
from env_helper import REPORTS_PATH, TEMP_PATH, GITHUB_REPOSITORY, GITHUB_SERVER_URL, GITHUB_RUN_ID from env_helper import (
REPORTS_PATH,
TEMP_PATH,
GITHUB_REPOSITORY,
GITHUB_SERVER_URL,
GITHUB_RUN_ID,
)
from report import create_build_html_report from report import create_build_html_report
from s3_helper import S3Helper from s3_helper import S3Helper
from get_robot_token import get_best_robot_token from get_robot_token import get_best_robot_token
@ -15,8 +21,19 @@ from commit_status_helper import get_commit
from ci_config import CI_CONFIG from ci_config import CI_CONFIG
from rerun_helper import RerunHelper from rerun_helper import RerunHelper
class BuildResult():
def __init__(self, compiler, build_type, sanitizer, bundled, splitted, status, elapsed_seconds, with_coverage): class BuildResult:
def __init__(
self,
compiler,
build_type,
sanitizer,
bundled,
splitted,
status,
elapsed_seconds,
with_coverage,
):
self.compiler = compiler self.compiler = compiler
self.build_type = build_type self.build_type = build_type
self.sanitizer = sanitizer self.sanitizer = sanitizer
@ -26,56 +43,72 @@ class BuildResult():
self.elapsed_seconds = elapsed_seconds self.elapsed_seconds = elapsed_seconds
self.with_coverage = with_coverage self.with_coverage = with_coverage
def group_by_artifacts(build_urls): def group_by_artifacts(build_urls):
groups = {'apk': [],'deb': [], 'binary': [], 'tgz': [], 'rpm': [], 'performance': []} groups = {
"apk": [],
"deb": [],
"binary": [],
"tgz": [],
"rpm": [],
"performance": [],
}
for url in build_urls: for url in build_urls:
if url.endswith('performance.tgz'): if url.endswith("performance.tgz"):
groups['performance'].append(url) groups["performance"].append(url)
elif url.endswith('.deb') or url.endswith('.buildinfo') or url.endswith('.changes') or url.endswith('.tar.gz'): elif (
groups['deb'].append(url) url.endswith(".deb")
elif url.endswith('.apk'): or url.endswith(".buildinfo")
groups['apk'].append(url) or url.endswith(".changes")
elif url.endswith('.rpm'): or url.endswith(".tar.gz")
groups['rpm'].append(url) ):
elif url.endswith('.tgz'): groups["deb"].append(url)
groups['tgz'].append(url) elif url.endswith(".apk"):
groups["apk"].append(url)
elif url.endswith(".rpm"):
groups["rpm"].append(url)
elif url.endswith(".tgz"):
groups["tgz"].append(url)
else: else:
groups['binary'].append(url) groups["binary"].append(url)
return groups return groups
def process_report(build_report): def process_report(build_report):
build_config = build_report['build_config'] build_config = build_report["build_config"]
build_result = BuildResult( build_result = BuildResult(
compiler=build_config['compiler'], compiler=build_config["compiler"],
build_type=build_config['build_type'], build_type=build_config["build_type"],
sanitizer=build_config['sanitizer'], sanitizer=build_config["sanitizer"],
bundled=build_config['bundled'], bundled=build_config["bundled"],
splitted=build_config['splitted'], splitted=build_config["splitted"],
status="success" if build_report['status'] else "failure", status="success" if build_report["status"] else "failure",
elapsed_seconds=build_report['elapsed_seconds'], elapsed_seconds=build_report["elapsed_seconds"],
with_coverage=False with_coverage=False,
) )
build_results = [] build_results = []
build_urls = [] build_urls = []
build_logs_urls = [] build_logs_urls = []
urls_groups = group_by_artifacts(build_report['build_urls']) urls_groups = group_by_artifacts(build_report["build_urls"])
found_group = False found_group = False
for _, group_urls in urls_groups.items(): for _, group_urls in urls_groups.items():
if group_urls: if group_urls:
build_results.append(build_result) build_results.append(build_result)
build_urls.append(group_urls) build_urls.append(group_urls)
build_logs_urls.append(build_report['log_url']) build_logs_urls.append(build_report["log_url"])
found_group = True found_group = True
if not found_group: if not found_group:
build_results.append(build_result) build_results.append(build_result)
build_urls.append([""]) build_urls.append([""])
build_logs_urls.append(build_report['log_url']) build_logs_urls.append(build_report["log_url"])
return build_results, build_urls, build_logs_urls return build_results, build_urls, build_logs_urls
def get_build_name_from_file_name(file_name): def get_build_name_from_file_name(file_name):
return file_name.replace('build_urls_', '').replace('.json', '') return file_name.replace("build_urls_", "").replace(".json", "")
if __name__ == "__main__": if __name__ == "__main__":
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
@ -101,17 +134,25 @@ if __name__ == "__main__":
build_reports_map = {} build_reports_map = {}
for root, dirs, files in os.walk(reports_path): for root, dirs, files in os.walk(reports_path):
for f in files: for f in files:
if f.startswith("build_urls_") and f.endswith('.json'): if f.startswith("build_urls_") and f.endswith(".json"):
logging.info("Found build report json %s", f) logging.info("Found build report json %s", f)
build_name = get_build_name_from_file_name(f) build_name = get_build_name_from_file_name(f)
if build_name in reports_order: if build_name in reports_order:
with open(os.path.join(root, f), 'r') as file_handler: with open(os.path.join(root, f), "r") as file_handler:
build_report = json.load(file_handler) build_report = json.load(file_handler)
build_reports_map[build_name] = build_report build_reports_map[build_name] = build_report
else: else:
logging.info("Skipping report %s for build %s, it's not in our reports list", f, build_name) logging.info(
"Skipping report %s for build %s, it's not in our reports list",
f,
build_name,
)
build_reports = [build_reports_map[build_name] for build_name in reports_order if build_name in build_reports_map] build_reports = [
build_reports_map[build_name]
for build_name in reports_order
if build_name in build_reports_map
]
build_results = [] build_results = []
build_artifacts = [] build_artifacts = []
@ -129,7 +170,7 @@ if __name__ == "__main__":
logging.info("No builds, failing check") logging.info("No builds, failing check")
sys.exit(1) sys.exit(1)
s3_helper = S3Helper('https://s3.amazonaws.com') s3_helper = S3Helper("https://s3.amazonaws.com")
pr_info = PRInfo() pr_info = PRInfo()
@ -139,7 +180,9 @@ if __name__ == "__main__":
branch_name = "PR #{}".format(pr_info.number) branch_name = "PR #{}".format(pr_info.number)
branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/pull/{pr_info.number}" branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/pull/{pr_info.number}"
commit_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commit/{pr_info.sha}" commit_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commit/{pr_info.sha}"
task_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID or '0'}" task_url = (
f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID or '0'}"
)
report = create_build_html_report( report = create_build_html_report(
build_check_name, build_check_name,
build_results, build_results,
@ -148,18 +191,22 @@ if __name__ == "__main__":
task_url, task_url,
branch_url, branch_url,
branch_name, branch_name,
commit_url commit_url,
) )
report_path = os.path.join(temp_path, 'report.html') report_path = os.path.join(temp_path, "report.html")
with open(report_path, 'w') as f: with open(report_path, "w") as f:
f.write(report) f.write(report)
logging.info("Going to upload prepared report") logging.info("Going to upload prepared report")
context_name_for_path = build_check_name.lower().replace(' ', '_') context_name_for_path = build_check_name.lower().replace(" ", "_")
s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + context_name_for_path s3_path_prefix = (
str(pr_info.number) + "/" + pr_info.sha + "/" + context_name_for_path
)
url = s3_helper.upload_build_file_to_s3(report_path, s3_path_prefix + "/report.html") url = s3_helper.upload_build_file_to_s3(
report_path, s3_path_prefix + "/report.html"
)
logging.info("Report url %s", url) logging.info("Report url %s", url)
total_builds = len(build_results) total_builds = len(build_results)
@ -182,4 +229,9 @@ if __name__ == "__main__":
print("::notice ::Report url: {}".format(url)) print("::notice ::Report url: {}".format(url))
commit = get_commit(gh, pr_info.sha) commit = get_commit(gh, pr_info.sha)
commit.create_status(context=build_check_name, description=description, state=summary_status, target_url=url) commit.create_status(
context=build_check_name,
description=description,
state=summary_status,
target_url=url,
)

View File

@ -13,16 +13,19 @@ from compress_files import decompress_fast, compress_fast
DOWNLOAD_RETRIES_COUNT = 5 DOWNLOAD_RETRIES_COUNT = 5
def dowload_file_with_progress(url, path): def dowload_file_with_progress(url, path):
logging.info("Downloading from %s to temp path %s", url, path) logging.info("Downloading from %s to temp path %s", url, path)
for i in range(DOWNLOAD_RETRIES_COUNT): for i in range(DOWNLOAD_RETRIES_COUNT):
try: try:
with open(path, 'wb') as f: with open(path, "wb") as f:
response = requests.get(url, stream=True) response = requests.get(url, stream=True)
response.raise_for_status() response.raise_for_status()
total_length = response.headers.get('content-length') total_length = response.headers.get("content-length")
if total_length is None or int(total_length) == 0: if total_length is None or int(total_length) == 0:
logging.info("No content-length, will download file without progress") logging.info(
"No content-length, will download file without progress"
)
f.write(response.content) f.write(response.content)
else: else:
dl = 0 dl = 0
@ -34,8 +37,8 @@ def dowload_file_with_progress(url, path):
if sys.stdout.isatty(): if sys.stdout.isatty():
done = int(50 * dl / total_length) done = int(50 * dl / total_length)
percent = int(100 * float(dl) / total_length) percent = int(100 * float(dl) / total_length)
eq_str = '=' * done eq_str = "=" * done
space_str = ' ' * (50 - done) space_str = " " * (50 - done)
sys.stdout.write(f"\r[{eq_str}{space_str}] {percent}%") sys.stdout.write(f"\r[{eq_str}{space_str}] {percent}%")
sys.stdout.flush() sys.stdout.flush()
break break
@ -52,7 +55,9 @@ def dowload_file_with_progress(url, path):
logging.info("Downloading finished") logging.info("Downloading finished")
def get_ccache_if_not_exists(path_to_ccache_dir, s3_helper, current_pr_number, temp_path): def get_ccache_if_not_exists(
path_to_ccache_dir, s3_helper, current_pr_number, temp_path
):
ccache_name = os.path.basename(path_to_ccache_dir) ccache_name = os.path.basename(path_to_ccache_dir)
cache_found = False cache_found = False
prs_to_check = [current_pr_number] prs_to_check = [current_pr_number]
@ -93,13 +98,16 @@ def get_ccache_if_not_exists(path_to_ccache_dir, s3_helper, current_pr_number, t
else: else:
logging.info("ccache downloaded") logging.info("ccache downloaded")
def upload_ccache(path_to_ccache_dir, s3_helper, current_pr_number, temp_path): def upload_ccache(path_to_ccache_dir, s3_helper, current_pr_number, temp_path):
logging.info("Uploading cache %s for pr %s", path_to_ccache_dir, current_pr_number) logging.info("Uploading cache %s for pr %s", path_to_ccache_dir, current_pr_number)
ccache_name = os.path.basename(path_to_ccache_dir) ccache_name = os.path.basename(path_to_ccache_dir)
compressed_cache_path = os.path.join(temp_path, ccache_name + ".tar.gz") compressed_cache_path = os.path.join(temp_path, ccache_name + ".tar.gz")
compress_fast(path_to_ccache_dir, compressed_cache_path) compress_fast(path_to_ccache_dir, compressed_cache_path)
s3_path = str(current_pr_number) + "/ccaches/" + os.path.basename(compressed_cache_path) s3_path = (
str(current_pr_number) + "/ccaches/" + os.path.basename(compressed_cache_path)
)
logging.info("Will upload %s to path %s", compressed_cache_path, s3_path) logging.info("Will upload %s to path %s", compressed_cache_path, s3_path)
s3_helper.upload_build_file_to_s3(compressed_cache_path, s3_path) s3_helper.upload_build_file_to_s3(compressed_cache_path, s3_path)
logging.info("Upload finished") logging.info("Upload finished")

View File

@ -20,21 +20,29 @@ if __name__ == "__main__":
if not os.path.exists(temp_path): if not os.path.exists(temp_path):
os.makedirs(temp_path) os.makedirs(temp_path)
sys.path.append(os.path.join(repo_path, "utils/github")) sys.path.append(os.path.join(repo_path, "utils/github"))
with SSHKey("ROBOT_CLICKHOUSE_SSH_KEY"): with SSHKey("ROBOT_CLICKHOUSE_SSH_KEY"):
token = get_parameter_from_ssm("github_robot_token_1") token = get_parameter_from_ssm("github_robot_token_1")
bp = Backport(token, os.environ.get("REPO_OWNER"), os.environ.get("REPO_NAME"), os.environ.get("REPO_TEAM")) bp = Backport(
token,
os.environ.get("REPO_OWNER"),
os.environ.get("REPO_NAME"),
os.environ.get("REPO_TEAM"),
)
def cherrypick_run(token, pr, branch): def cherrypick_run(token, pr, branch):
return CherryPick(token, return CherryPick(
os.environ.get("REPO_OWNER"), os.environ.get("REPO_NAME"), token,
os.environ.get("REPO_TEAM"), pr, branch os.environ.get("REPO_OWNER"),
).execute(repo_path, False) os.environ.get("REPO_NAME"),
os.environ.get("REPO_TEAM"),
pr,
branch,
).execute(repo_path, False)
try: try:
bp.execute(repo_path, 'origin', None, cherrypick_run) bp.execute(repo_path, "origin", None, cherrypick_run)
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
logging.error(e.output) logging.error(e.output)

View File

@ -17,7 +17,9 @@ import sys
class Backport: class Backport:
def __init__(self, token, owner, name, team): def __init__(self, token, owner, name, team):
self._gh = RemoteRepo(token, owner=owner, name=name, team=team, max_page_size=30, min_page_size=7) self._gh = RemoteRepo(
token, owner=owner, name=name, team=team, max_page_size=30, min_page_size=7
)
self._token = token self._token = token
self.default_branch_name = self._gh.default_branch self.default_branch_name = self._gh.default_branch
self.ssh_url = self._gh.ssh_url self.ssh_url = self._gh.ssh_url
@ -28,7 +30,7 @@ class Backport:
def getBranchesWithRelease(self): def getBranchesWithRelease(self):
branches = set() branches = set()
for pull_request in self._gh.find_pull_requests("release"): for pull_request in self._gh.find_pull_requests("release"):
branches.add(pull_request['headRefName']) branches.add(pull_request["headRefName"])
return branches return branches
def execute(self, repo, upstream, until_commit, run_cherrypick): def execute(self, repo, upstream, until_commit, run_cherrypick):
@ -44,11 +46,11 @@ class Backport:
branches.append(branch) branches.append(branch)
if not branches: if not branches:
logging.info('No release branches found!') logging.info("No release branches found!")
return return
for branch in branches: for branch in branches:
logging.info('Found release branch: %s', branch[0]) logging.info("Found release branch: %s", branch[0])
if not until_commit: if not until_commit:
until_commit = branches[0][1] until_commit = branches[0][1]
@ -56,73 +58,128 @@ class Backport:
backport_map = {} backport_map = {}
RE_MUST_BACKPORT = re.compile(r'^v(\d+\.\d+)-must-backport$') RE_MUST_BACKPORT = re.compile(r"^v(\d+\.\d+)-must-backport$")
RE_NO_BACKPORT = re.compile(r'^v(\d+\.\d+)-no-backport$') RE_NO_BACKPORT = re.compile(r"^v(\d+\.\d+)-no-backport$")
RE_BACKPORTED = re.compile(r'^v(\d+\.\d+)-backported$') RE_BACKPORTED = re.compile(r"^v(\d+\.\d+)-backported$")
# pull-requests are sorted by ancestry from the most recent. # pull-requests are sorted by ancestry from the most recent.
for pr in pull_requests: for pr in pull_requests:
while repo.comparator(branches[-1][1]) >= repo.comparator(pr['mergeCommit']['oid']): while repo.comparator(branches[-1][1]) >= repo.comparator(
logging.info("PR #{} is already inside {}. Dropping this branch for further PRs".format(pr['number'], branches[-1][0])) pr["mergeCommit"]["oid"]
):
logging.info(
"PR #{} is already inside {}. Dropping this branch for further PRs".format(
pr["number"], branches[-1][0]
)
)
branches.pop() branches.pop()
logging.info("Processing PR #{}".format(pr['number'])) logging.info("Processing PR #{}".format(pr["number"]))
assert len(branches) assert len(branches)
branch_set = set([branch[0] for branch in branches]) branch_set = set([branch[0] for branch in branches])
# First pass. Find all must-backports # First pass. Find all must-backports
for label in pr['labels']['nodes']: for label in pr["labels"]["nodes"]:
if label['name'] == 'pr-must-backport': if label["name"] == "pr-must-backport":
backport_map[pr['number']] = branch_set.copy() backport_map[pr["number"]] = branch_set.copy()
continue continue
matched = RE_MUST_BACKPORT.match(label['name']) matched = RE_MUST_BACKPORT.match(label["name"])
if matched: if matched:
if pr['number'] not in backport_map: if pr["number"] not in backport_map:
backport_map[pr['number']] = set() backport_map[pr["number"]] = set()
backport_map[pr['number']].add(matched.group(1)) backport_map[pr["number"]].add(matched.group(1))
# Second pass. Find all no-backports # Second pass. Find all no-backports
for label in pr['labels']['nodes']: for label in pr["labels"]["nodes"]:
if label['name'] == 'pr-no-backport' and pr['number'] in backport_map: if label["name"] == "pr-no-backport" and pr["number"] in backport_map:
del backport_map[pr['number']] del backport_map[pr["number"]]
break break
matched_no_backport = RE_NO_BACKPORT.match(label['name']) matched_no_backport = RE_NO_BACKPORT.match(label["name"])
matched_backported = RE_BACKPORTED.match(label['name']) matched_backported = RE_BACKPORTED.match(label["name"])
if matched_no_backport and pr['number'] in backport_map and matched_no_backport.group(1) in backport_map[pr['number']]: if (
backport_map[pr['number']].remove(matched_no_backport.group(1)) matched_no_backport
logging.info('\tskipping %s because of forced no-backport', matched_no_backport.group(1)) and pr["number"] in backport_map
elif matched_backported and pr['number'] in backport_map and matched_backported.group(1) in backport_map[pr['number']]: and matched_no_backport.group(1) in backport_map[pr["number"]]
backport_map[pr['number']].remove(matched_backported.group(1)) ):
logging.info('\tskipping %s because it\'s already backported manually', matched_backported.group(1)) backport_map[pr["number"]].remove(matched_no_backport.group(1))
logging.info(
"\tskipping %s because of forced no-backport",
matched_no_backport.group(1),
)
elif (
matched_backported
and pr["number"] in backport_map
and matched_backported.group(1) in backport_map[pr["number"]]
):
backport_map[pr["number"]].remove(matched_backported.group(1))
logging.info(
"\tskipping %s because it's already backported manually",
matched_backported.group(1),
)
for pr, branches in list(backport_map.items()): for pr, branches in list(backport_map.items()):
logging.info('PR #%s needs to be backported to:', pr) logging.info("PR #%s needs to be backported to:", pr)
for branch in branches: for branch in branches:
logging.info('\t%s, and the status is: %s', branch, run_cherrypick(self._token, pr, branch)) logging.info(
"\t%s, and the status is: %s",
branch,
run_cherrypick(self._token, pr, branch),
)
# print API costs # print API costs
logging.info('\nGitHub API total costs per query:') logging.info("\nGitHub API total costs per query:")
for name, value in list(self._gh.api_costs.items()): for name, value in list(self._gh.api_costs.items()):
logging.info('%s : %s', name, value) logging.info("%s : %s", name, value)
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--token', type=str, required=True, help='token for Github access') parser.add_argument(
parser.add_argument('--repo', type=str, required=True, help='path to full repository', metavar='PATH') "--token", type=str, required=True, help="token for Github access"
parser.add_argument('--til', type=str, help='check PRs from HEAD til this commit', metavar='COMMIT') )
parser.add_argument('--dry-run', action='store_true', help='do not create or merge any PRs', default=False) parser.add_argument(
parser.add_argument('--verbose', '-v', action='store_true', help='more verbose output', default=False) "--repo",
parser.add_argument('--upstream', '-u', type=str, help='remote name of upstream in repository', default='origin') type=str,
required=True,
help="path to full repository",
metavar="PATH",
)
parser.add_argument(
"--til", type=str, help="check PRs from HEAD til this commit", metavar="COMMIT"
)
parser.add_argument(
"--dry-run",
action="store_true",
help="do not create or merge any PRs",
default=False,
)
parser.add_argument(
"--verbose",
"-v",
action="store_true",
help="more verbose output",
default=False,
)
parser.add_argument(
"--upstream",
"-u",
type=str,
help="remote name of upstream in repository",
default="origin",
)
args = parser.parse_args() args = parser.parse_args()
if args.verbose: if args.verbose:
logging.basicConfig(format='%(message)s', stream=sys.stdout, level=logging.DEBUG) logging.basicConfig(
format="%(message)s", stream=sys.stdout, level=logging.DEBUG
)
else: else:
logging.basicConfig(format='%(message)s', stream=sys.stdout, level=logging.INFO) logging.basicConfig(format="%(message)s", stream=sys.stdout, level=logging.INFO)
cherrypick_run = lambda token, pr, branch: CherryPick(token, 'ClickHouse', 'ClickHouse', 'core', pr, branch).execute(args.repo, args.dry_run) cherrypick_run = lambda token, pr, branch: CherryPick(
bp = Backport(args.token, 'ClickHouse', 'ClickHouse', 'core') token, "ClickHouse", "ClickHouse", "core", pr, branch
).execute(args.repo, args.dry_run)
bp = Backport(args.token, "ClickHouse", "ClickHouse", "core")
bp.execute(args.repo, args.upstream, args.til, cherrypick_run) bp.execute(args.repo, args.upstream, args.til, cherrypick_run)

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
''' """
Backports changes from PR to release branch. Backports changes from PR to release branch.
Requires multiple separate runs as part of the implementation. Requires multiple separate runs as part of the implementation.
@ -12,7 +12,7 @@ First run should do the following:
Second run checks PR from previous run to be merged or at least being mergeable. If it's not merged then try to merge it. Second run checks PR from previous run to be merged or at least being mergeable. If it's not merged then try to merge it.
Third run creates PR from backport branch (with merged previous PR) to release branch. Third run creates PR from backport branch (with merged previous PR) to release branch.
''' """
try: try:
from clickhouse.utils.github.query import Query as RemoteRepo from clickhouse.utils.github.query import Query as RemoteRepo
@ -29,13 +29,13 @@ import sys
class CherryPick: class CherryPick:
class Status(Enum): class Status(Enum):
DISCARDED = 'discarded' DISCARDED = "discarded"
NOT_INITIATED = 'not started' NOT_INITIATED = "not started"
FIRST_MERGEABLE = 'waiting for 1st stage' FIRST_MERGEABLE = "waiting for 1st stage"
FIRST_CONFLICTS = 'conflicts on 1st stage' FIRST_CONFLICTS = "conflicts on 1st stage"
SECOND_MERGEABLE = 'waiting for 2nd stage' SECOND_MERGEABLE = "waiting for 2nd stage"
SECOND_CONFLICTS = 'conflicts on 2nd stage' SECOND_CONFLICTS = "conflicts on 2nd stage"
MERGED = 'backported' MERGED = "backported"
def _run(self, args): def _run(self, args):
out = subprocess.check_output(args).rstrip() out = subprocess.check_output(args).rstrip()
@ -50,51 +50,90 @@ class CherryPick:
# TODO: check if pull-request is merged. # TODO: check if pull-request is merged.
self.merge_commit_oid = self._pr['mergeCommit']['oid'] self.merge_commit_oid = self._pr["mergeCommit"]["oid"]
self.target_branch = target_branch self.target_branch = target_branch
self.backport_branch = 'backport/{branch}/{pr}'.format(branch=target_branch, pr=pr_number) self.backport_branch = "backport/{branch}/{pr}".format(
self.cherrypick_branch = 'cherrypick/{branch}/{oid}'.format(branch=target_branch, oid=self.merge_commit_oid) branch=target_branch, pr=pr_number
)
self.cherrypick_branch = "cherrypick/{branch}/{oid}".format(
branch=target_branch, oid=self.merge_commit_oid
)
def getCherryPickPullRequest(self): def getCherryPickPullRequest(self):
return self._gh.find_pull_request(base=self.backport_branch, head=self.cherrypick_branch) return self._gh.find_pull_request(
base=self.backport_branch, head=self.cherrypick_branch
)
def createCherryPickPullRequest(self, repo_path): def createCherryPickPullRequest(self, repo_path):
DESCRIPTION = ( DESCRIPTION = (
'This pull-request is a first step of an automated backporting.\n' "This pull-request is a first step of an automated backporting.\n"
'It contains changes like after calling a local command `git cherry-pick`.\n' "It contains changes like after calling a local command `git cherry-pick`.\n"
'If you intend to continue backporting this changes, then resolve all conflicts if any.\n' "If you intend to continue backporting this changes, then resolve all conflicts if any.\n"
'Otherwise, if you do not want to backport them, then just close this pull-request.\n' "Otherwise, if you do not want to backport them, then just close this pull-request.\n"
'\n' "\n"
'The check results does not matter at this step - you can safely ignore them.\n' "The check results does not matter at this step - you can safely ignore them.\n"
'Also this pull-request will be merged automatically as it reaches the mergeable state, but you always can merge it manually.\n' "Also this pull-request will be merged automatically as it reaches the mergeable state, but you always can merge it manually.\n"
) )
# FIXME: replace with something better than os.system() # FIXME: replace with something better than os.system()
git_prefix = ['git', '-C', repo_path, '-c', 'user.email=robot-clickhouse@yandex-team.ru', '-c', 'user.name=robot-clickhouse'] git_prefix = [
base_commit_oid = self._pr['mergeCommit']['parents']['nodes'][0]['oid'] "git",
"-C",
repo_path,
"-c",
"user.email=robot-clickhouse@yandex-team.ru",
"-c",
"user.name=robot-clickhouse",
]
base_commit_oid = self._pr["mergeCommit"]["parents"]["nodes"][0]["oid"]
# Create separate branch for backporting, and make it look like real cherry-pick. # Create separate branch for backporting, and make it look like real cherry-pick.
self._run(git_prefix + ['checkout', '-f', self.target_branch]) self._run(git_prefix + ["checkout", "-f", self.target_branch])
self._run(git_prefix + ['checkout', '-B', self.backport_branch]) self._run(git_prefix + ["checkout", "-B", self.backport_branch])
self._run(git_prefix + ['merge', '-s', 'ours', '--no-edit', base_commit_oid]) self._run(git_prefix + ["merge", "-s", "ours", "--no-edit", base_commit_oid])
# Create secondary branch to allow pull request with cherry-picked commit. # Create secondary branch to allow pull request with cherry-picked commit.
self._run(git_prefix + ['branch', '-f', self.cherrypick_branch, self.merge_commit_oid]) self._run(
git_prefix + ["branch", "-f", self.cherrypick_branch, self.merge_commit_oid]
)
self._run(git_prefix + ['push', '-f', 'origin', '{branch}:{branch}'.format(branch=self.backport_branch)]) self._run(
self._run(git_prefix + ['push', '-f', 'origin', '{branch}:{branch}'.format(branch=self.cherrypick_branch)]) git_prefix
+ [
"push",
"-f",
"origin",
"{branch}:{branch}".format(branch=self.backport_branch),
]
)
self._run(
git_prefix
+ [
"push",
"-f",
"origin",
"{branch}:{branch}".format(branch=self.cherrypick_branch),
]
)
# Create pull-request like a local cherry-pick # Create pull-request like a local cherry-pick
pr = self._gh.create_pull_request(source=self.cherrypick_branch, target=self.backport_branch, pr = self._gh.create_pull_request(
title='Cherry pick #{number} to {target}: {title}'.format( source=self.cherrypick_branch,
number=self._pr['number'], target=self.target_branch, target=self.backport_branch,
title=self._pr['title'].replace('"', '\\"')), title="Cherry pick #{number} to {target}: {title}".format(
description='Original pull-request #{}\n\n{}'.format(self._pr['number'], DESCRIPTION)) number=self._pr["number"],
target=self.target_branch,
title=self._pr["title"].replace('"', '\\"'),
),
description="Original pull-request #{}\n\n{}".format(
self._pr["number"], DESCRIPTION
),
)
# FIXME: use `team` to leave a single eligible assignee. # FIXME: use `team` to leave a single eligible assignee.
self._gh.add_assignee(pr, self._pr['author']) self._gh.add_assignee(pr, self._pr["author"])
self._gh.add_assignee(pr, self._pr['mergedBy']) self._gh.add_assignee(pr, self._pr["mergedBy"])
self._gh.set_label(pr, "do not test") self._gh.set_label(pr, "do not test")
self._gh.set_label(pr, "pr-cherrypick") self._gh.set_label(pr, "pr-cherrypick")
@ -102,36 +141,76 @@ class CherryPick:
return pr return pr
def mergeCherryPickPullRequest(self, cherrypick_pr): def mergeCherryPickPullRequest(self, cherrypick_pr):
return self._gh.merge_pull_request(cherrypick_pr['id']) return self._gh.merge_pull_request(cherrypick_pr["id"])
def getBackportPullRequest(self): def getBackportPullRequest(self):
return self._gh.find_pull_request(base=self.target_branch, head=self.backport_branch) return self._gh.find_pull_request(
base=self.target_branch, head=self.backport_branch
)
def createBackportPullRequest(self, cherrypick_pr, repo_path): def createBackportPullRequest(self, cherrypick_pr, repo_path):
DESCRIPTION = ( DESCRIPTION = (
'This pull-request is a last step of an automated backporting.\n' "This pull-request is a last step of an automated backporting.\n"
'Treat it as a standard pull-request: look at the checks and resolve conflicts.\n' "Treat it as a standard pull-request: look at the checks and resolve conflicts.\n"
'Merge it only if you intend to backport changes to the target branch, otherwise just close it.\n' "Merge it only if you intend to backport changes to the target branch, otherwise just close it.\n"
) )
git_prefix = ['git', '-C', repo_path, '-c', 'user.email=robot-clickhouse@clickhouse.com', '-c', 'user.name=robot-clickhouse'] git_prefix = [
"git",
"-C",
repo_path,
"-c",
"user.email=robot-clickhouse@clickhouse.com",
"-c",
"user.name=robot-clickhouse",
]
pr_title = 'Backport #{number} to {target}: {title}'.format( pr_title = "Backport #{number} to {target}: {title}".format(
number=self._pr['number'], target=self.target_branch, number=self._pr["number"],
title=self._pr['title'].replace('"', '\\"')) target=self.target_branch,
title=self._pr["title"].replace('"', '\\"'),
)
self._run(git_prefix + ['checkout', '-f', self.backport_branch]) self._run(git_prefix + ["checkout", "-f", self.backport_branch])
self._run(git_prefix + ['pull', '--ff-only', 'origin', self.backport_branch]) self._run(git_prefix + ["pull", "--ff-only", "origin", self.backport_branch])
self._run(git_prefix + ['reset', '--soft', self._run(git_prefix + ['merge-base', 'origin/' + self.target_branch, self.backport_branch])]) self._run(
self._run(git_prefix + ['commit', '-a', '--allow-empty', '-m', pr_title]) git_prefix
self._run(git_prefix + ['push', '-f', 'origin', '{branch}:{branch}'.format(branch=self.backport_branch)]) + [
"reset",
"--soft",
self._run(
git_prefix
+ [
"merge-base",
"origin/" + self.target_branch,
self.backport_branch,
]
),
]
)
self._run(git_prefix + ["commit", "-a", "--allow-empty", "-m", pr_title])
self._run(
git_prefix
+ [
"push",
"-f",
"origin",
"{branch}:{branch}".format(branch=self.backport_branch),
]
)
pr = self._gh.create_pull_request(source=self.backport_branch, target=self.target_branch, title=pr_title, pr = self._gh.create_pull_request(
description='Original pull-request #{}\nCherry-pick pull-request #{}\n\n{}'.format(self._pr['number'], cherrypick_pr['number'], DESCRIPTION)) source=self.backport_branch,
target=self.target_branch,
title=pr_title,
description="Original pull-request #{}\nCherry-pick pull-request #{}\n\n{}".format(
self._pr["number"], cherrypick_pr["number"], DESCRIPTION
),
)
# FIXME: use `team` to leave a single eligible assignee. # FIXME: use `team` to leave a single eligible assignee.
self._gh.add_assignee(pr, self._pr['author']) self._gh.add_assignee(pr, self._pr["author"])
self._gh.add_assignee(pr, self._pr['mergedBy']) self._gh.add_assignee(pr, self._pr["mergedBy"])
self._gh.set_label(pr, "pr-backport") self._gh.set_label(pr, "pr-backport")
@ -142,23 +221,43 @@ class CherryPick:
if not pr1: if not pr1:
if not dry_run: if not dry_run:
pr1 = self.createCherryPickPullRequest(repo_path) pr1 = self.createCherryPickPullRequest(repo_path)
logging.debug('Created PR with cherry-pick of %s to %s: %s', self._pr['number'], self.target_branch, pr1['url']) logging.debug(
"Created PR with cherry-pick of %s to %s: %s",
self._pr["number"],
self.target_branch,
pr1["url"],
)
else: else:
return CherryPick.Status.NOT_INITIATED return CherryPick.Status.NOT_INITIATED
else: else:
logging.debug('Found PR with cherry-pick of %s to %s: %s', self._pr['number'], self.target_branch, pr1['url']) logging.debug(
"Found PR with cherry-pick of %s to %s: %s",
self._pr["number"],
self.target_branch,
pr1["url"],
)
if not pr1['merged'] and pr1['mergeable'] == 'MERGEABLE' and not pr1['closed']: if not pr1["merged"] and pr1["mergeable"] == "MERGEABLE" and not pr1["closed"]:
if not dry_run: if not dry_run:
pr1 = self.mergeCherryPickPullRequest(pr1) pr1 = self.mergeCherryPickPullRequest(pr1)
logging.debug('Merged PR with cherry-pick of %s to %s: %s', self._pr['number'], self.target_branch, pr1['url']) logging.debug(
"Merged PR with cherry-pick of %s to %s: %s",
self._pr["number"],
self.target_branch,
pr1["url"],
)
if not pr1['merged']: if not pr1["merged"]:
logging.debug('Waiting for PR with cherry-pick of %s to %s: %s', self._pr['number'], self.target_branch, pr1['url']) logging.debug(
"Waiting for PR with cherry-pick of %s to %s: %s",
self._pr["number"],
self.target_branch,
pr1["url"],
)
if pr1['closed']: if pr1["closed"]:
return CherryPick.Status.DISCARDED return CherryPick.Status.DISCARDED
elif pr1['mergeable'] == 'CONFLICTING': elif pr1["mergeable"] == "CONFLICTING":
return CherryPick.Status.FIRST_CONFLICTS return CherryPick.Status.FIRST_CONFLICTS
else: else:
return CherryPick.Status.FIRST_MERGEABLE return CherryPick.Status.FIRST_MERGEABLE
@ -167,31 +266,58 @@ class CherryPick:
if not pr2: if not pr2:
if not dry_run: if not dry_run:
pr2 = self.createBackportPullRequest(pr1, repo_path) pr2 = self.createBackportPullRequest(pr1, repo_path)
logging.debug('Created PR with backport of %s to %s: %s', self._pr['number'], self.target_branch, pr2['url']) logging.debug(
"Created PR with backport of %s to %s: %s",
self._pr["number"],
self.target_branch,
pr2["url"],
)
else: else:
return CherryPick.Status.FIRST_MERGEABLE return CherryPick.Status.FIRST_MERGEABLE
else: else:
logging.debug('Found PR with backport of %s to %s: %s', self._pr['number'], self.target_branch, pr2['url']) logging.debug(
"Found PR with backport of %s to %s: %s",
self._pr["number"],
self.target_branch,
pr2["url"],
)
if pr2['merged']: if pr2["merged"]:
return CherryPick.Status.MERGED return CherryPick.Status.MERGED
elif pr2['closed']: elif pr2["closed"]:
return CherryPick.Status.DISCARDED return CherryPick.Status.DISCARDED
elif pr2['mergeable'] == 'CONFLICTING': elif pr2["mergeable"] == "CONFLICTING":
return CherryPick.Status.SECOND_CONFLICTS return CherryPick.Status.SECOND_CONFLICTS
else: else:
return CherryPick.Status.SECOND_MERGEABLE return CherryPick.Status.SECOND_MERGEABLE
if __name__ == "__main__": if __name__ == "__main__":
logging.basicConfig(format='%(message)s', stream=sys.stdout, level=logging.DEBUG) logging.basicConfig(format="%(message)s", stream=sys.stdout, level=logging.DEBUG)
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--token', '-t', type=str, required=True, help='token for Github access') parser.add_argument(
parser.add_argument('--pr', type=str, required=True, help='PR# to cherry-pick') "--token", "-t", type=str, required=True, help="token for Github access"
parser.add_argument('--branch', '-b', type=str, required=True, help='target branch name for cherry-pick') )
parser.add_argument('--repo', '-r', type=str, required=True, help='path to full repository', metavar='PATH') parser.add_argument("--pr", type=str, required=True, help="PR# to cherry-pick")
parser.add_argument(
"--branch",
"-b",
type=str,
required=True,
help="target branch name for cherry-pick",
)
parser.add_argument(
"--repo",
"-r",
type=str,
required=True,
help="path to full repository",
metavar="PATH",
)
args = parser.parse_args() args = parser.parse_args()
cp = CherryPick(args.token, 'ClickHouse', 'ClickHouse', 'core', args.pr, args.branch) cp = CherryPick(
args.token, "ClickHouse", "ClickHouse", "core", args.pr, args.branch
)
cp.execute(args.repo) cp.execute(args.repo)

View File

@ -20,13 +20,14 @@ class RepositoryBase:
return -1 return -1
else: else:
return 1 return 1
self.comparator = functools.cmp_to_key(cmp) self.comparator = functools.cmp_to_key(cmp)
def get_head_commit(self): def get_head_commit(self):
return self._repo.commit(self._default) return self._repo.commit(self._default)
def iterate(self, begin, end): def iterate(self, begin, end):
rev_range = '{}...{}'.format(begin, end) rev_range = "{}...{}".format(begin, end)
for commit in self._repo.iter_commits(rev_range, first_parent=True): for commit in self._repo.iter_commits(rev_range, first_parent=True):
yield commit yield commit
@ -39,27 +40,35 @@ class Repository(RepositoryBase):
self._default = self._remote.refs[default_branch_name] self._default = self._remote.refs[default_branch_name]
def get_release_branches(self): def get_release_branches(self):
''' """
Returns sorted list of tuples: Returns sorted list of tuples:
* remote branch (git.refs.remote.RemoteReference), * remote branch (git.refs.remote.RemoteReference),
* base commit (git.Commit), * base commit (git.Commit),
* head (git.Commit)). * head (git.Commit)).
List is sorted by commits in ascending order. List is sorted by commits in ascending order.
''' """
release_branches = [] release_branches = []
RE_RELEASE_BRANCH_REF = re.compile(r'^refs/remotes/.+/\d+\.\d+$') RE_RELEASE_BRANCH_REF = re.compile(r"^refs/remotes/.+/\d+\.\d+$")
for branch in [r for r in self._remote.refs if RE_RELEASE_BRANCH_REF.match(r.path)]: for branch in [
r for r in self._remote.refs if RE_RELEASE_BRANCH_REF.match(r.path)
]:
base = self._repo.merge_base(self._default, self._repo.commit(branch)) base = self._repo.merge_base(self._default, self._repo.commit(branch))
if not base: if not base:
logging.info('Branch %s is not based on branch %s. Ignoring.', branch.path, self._default) logging.info(
"Branch %s is not based on branch %s. Ignoring.",
branch.path,
self._default,
)
elif len(base) > 1: elif len(base) > 1:
logging.info('Branch %s has more than one base commit. Ignoring.', branch.path) logging.info(
"Branch %s has more than one base commit. Ignoring.", branch.path
)
else: else:
release_branches.append((os.path.basename(branch.name), base[0])) release_branches.append((os.path.basename(branch.name), base[0]))
return sorted(release_branches, key=lambda x : self.comparator(x[1])) return sorted(release_branches, key=lambda x: self.comparator(x[1]))
class BareRepository(RepositoryBase): class BareRepository(RepositoryBase):
@ -68,24 +77,32 @@ class BareRepository(RepositoryBase):
self._default = self._repo.branches[default_branch_name] self._default = self._repo.branches[default_branch_name]
def get_release_branches(self): def get_release_branches(self):
''' """
Returns sorted list of tuples: Returns sorted list of tuples:
* branch (git.refs.head?), * branch (git.refs.head?),
* base commit (git.Commit), * base commit (git.Commit),
* head (git.Commit)). * head (git.Commit)).
List is sorted by commits in ascending order. List is sorted by commits in ascending order.
''' """
release_branches = [] release_branches = []
RE_RELEASE_BRANCH_REF = re.compile(r'^refs/heads/\d+\.\d+$') RE_RELEASE_BRANCH_REF = re.compile(r"^refs/heads/\d+\.\d+$")
for branch in [r for r in self._repo.branches if RE_RELEASE_BRANCH_REF.match(r.path)]: for branch in [
r for r in self._repo.branches if RE_RELEASE_BRANCH_REF.match(r.path)
]:
base = self._repo.merge_base(self._default, self._repo.commit(branch)) base = self._repo.merge_base(self._default, self._repo.commit(branch))
if not base: if not base:
logging.info('Branch %s is not based on branch %s. Ignoring.', branch.path, self._default) logging.info(
"Branch %s is not based on branch %s. Ignoring.",
branch.path,
self._default,
)
elif len(base) > 1: elif len(base) > 1:
logging.info('Branch %s has more than one base commit. Ignoring.', branch.path) logging.info(
"Branch %s has more than one base commit. Ignoring.", branch.path
)
else: else:
release_branches.append((os.path.basename(branch.name), base[0])) release_branches.append((os.path.basename(branch.name), base[0]))
return sorted(release_branches, key=lambda x : self.comparator(x[1])) return sorted(release_branches, key=lambda x: self.comparator(x[1]))

View File

@ -1,19 +1,20 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
class Description: class Description:
'''Parsed description representation """Parsed description representation"""
'''
MAP_CATEGORY_TO_LABEL = { MAP_CATEGORY_TO_LABEL = {
'New Feature': 'pr-feature', "New Feature": "pr-feature",
'Bug Fix': 'pr-bugfix', "Bug Fix": "pr-bugfix",
'Improvement': 'pr-improvement', "Improvement": "pr-improvement",
'Performance Improvement': 'pr-performance', "Performance Improvement": "pr-performance",
# 'Backward Incompatible Change': doesn't match anything # 'Backward Incompatible Change': doesn't match anything
'Build/Testing/Packaging Improvement': 'pr-build', "Build/Testing/Packaging Improvement": "pr-build",
'Non-significant (changelog entry is not needed)': 'pr-non-significant', "Non-significant (changelog entry is not needed)": "pr-non-significant",
'Non-significant (changelog entry is not required)': 'pr-non-significant', "Non-significant (changelog entry is not required)": "pr-non-significant",
'Non-significant': 'pr-non-significant', "Non-significant": "pr-non-significant",
'Documentation (changelog entry is not required)': 'pr-documentation', "Documentation (changelog entry is not required)": "pr-documentation",
# 'Other': doesn't match anything # 'Other': doesn't match anything
} }
@ -21,7 +22,7 @@ class Description:
self.label_name = str() self.label_name = str()
self.legal = False self.legal = False
self._parse(pull_request['bodyText']) self._parse(pull_request["bodyText"])
def _parse(self, text): def _parse(self, text):
lines = text.splitlines() lines = text.splitlines()
@ -38,14 +39,17 @@ class Description:
category = stripped category = stripped
next_category = False next_category = False
if stripped == 'I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en': if (
stripped
== "I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en"
):
self.legal = True self.legal = True
category_headers = ( category_headers = (
'Category (leave one):', "Category (leave one):",
'Changelog category (leave one):', "Changelog category (leave one):",
'Changelog category:', "Changelog category:",
'Category:' "Category:",
) )
if stripped in category_headers: if stripped in category_headers:
@ -55,6 +59,6 @@ class Description:
self.label_name = Description.MAP_CATEGORY_TO_LABEL[category] self.label_name = Description.MAP_CATEGORY_TO_LABEL[category]
else: else:
if not category: if not category:
print('Cannot find category in pr description') print("Cannot find category in pr description")
else: else:
print(('Unknown category: ' + category)) print(("Unknown category: " + category))

View File

@ -5,11 +5,11 @@ import time
class Query: class Query:
''' """
Implements queries to the Github API using GraphQL Implements queries to the Github API using GraphQL
''' """
_PULL_REQUEST = ''' _PULL_REQUEST = """
author {{ author {{
... on User {{ ... on User {{
id id
@ -47,7 +47,7 @@ class Query:
number number
title title
url url
''' """
def __init__(self, token, owner, name, team, max_page_size=100, min_page_size=10): def __init__(self, token, owner, name, team, max_page_size=100, min_page_size=10):
self._PULL_REQUEST = Query._PULL_REQUEST.format(min_page_size=min_page_size) self._PULL_REQUEST = Query._PULL_REQUEST.format(min_page_size=min_page_size)
@ -63,14 +63,14 @@ class Query:
self.api_costs = {} self.api_costs = {}
repo = self.get_repository() repo = self.get_repository()
self._id = repo['id'] self._id = repo["id"]
self.ssh_url = repo['sshUrl'] self.ssh_url = repo["sshUrl"]
self.default_branch = repo['defaultBranchRef']['name'] self.default_branch = repo["defaultBranchRef"]["name"]
self.members = set(self.get_members()) self.members = set(self.get_members())
def get_repository(self): def get_repository(self):
_QUERY = ''' _QUERY = """
repository(owner: "{owner}" name: "{name}") {{ repository(owner: "{owner}" name: "{name}") {{
defaultBranchRef {{ defaultBranchRef {{
name name
@ -78,19 +78,19 @@ class Query:
id id
sshUrl sshUrl
}} }}
''' """
query = _QUERY.format(owner=self._owner, name=self._name) query = _QUERY.format(owner=self._owner, name=self._name)
return self._run(query)['repository'] return self._run(query)["repository"]
def get_members(self): def get_members(self):
'''Get all team members for organization """Get all team members for organization
Returns: Returns:
members: a map of members' logins to ids members: a map of members' logins to ids
''' """
_QUERY = ''' _QUERY = """
organization(login: "{organization}") {{ organization(login: "{organization}") {{
team(slug: "{team}") {{ team(slug: "{team}") {{
members(first: {max_page_size} {next}) {{ members(first: {max_page_size} {next}) {{
@ -105,43 +105,54 @@ class Query:
}} }}
}} }}
}} }}
''' """
members = {} members = {}
not_end = True not_end = True
query = _QUERY.format(organization=self._owner, team=self._team, query = _QUERY.format(
max_page_size=self._max_page_size, organization=self._owner,
next='') team=self._team,
max_page_size=self._max_page_size,
next="",
)
while not_end: while not_end:
result = self._run(query)['organization']['team'] result = self._run(query)["organization"]["team"]
if result is None: if result is None:
break break
result = result['members'] result = result["members"]
not_end = result['pageInfo']['hasNextPage'] not_end = result["pageInfo"]["hasNextPage"]
query = _QUERY.format(organization=self._owner, team=self._team, query = _QUERY.format(
max_page_size=self._max_page_size, organization=self._owner,
next='after: "{}"'.format(result["pageInfo"]["endCursor"])) team=self._team,
max_page_size=self._max_page_size,
next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
)
members += dict([(node['login'], node['id']) for node in result['nodes']]) members += dict([(node["login"], node["id"]) for node in result["nodes"]])
return members return members
def get_pull_request(self, number): def get_pull_request(self, number):
_QUERY = ''' _QUERY = """
repository(owner: "{owner}" name: "{name}") {{ repository(owner: "{owner}" name: "{name}") {{
pullRequest(number: {number}) {{ pullRequest(number: {number}) {{
{pull_request_data} {pull_request_data}
}} }}
}} }}
''' """
query = _QUERY.format(owner=self._owner, name=self._name, number=number, query = _QUERY.format(
pull_request_data=self._PULL_REQUEST, min_page_size=self._min_page_size) owner=self._owner,
return self._run(query)['repository']['pullRequest'] name=self._name,
number=number,
pull_request_data=self._PULL_REQUEST,
min_page_size=self._min_page_size,
)
return self._run(query)["repository"]["pullRequest"]
def find_pull_request(self, base, head): def find_pull_request(self, base, head):
_QUERY = ''' _QUERY = """
repository(owner: "{owner}" name: "{name}") {{ repository(owner: "{owner}" name: "{name}") {{
pullRequests(first: {min_page_size} baseRefName: "{base}" headRefName: "{head}") {{ pullRequests(first: {min_page_size} baseRefName: "{base}" headRefName: "{head}") {{
nodes {{ nodes {{
@ -150,21 +161,27 @@ class Query:
totalCount totalCount
}} }}
}} }}
''' """
query = _QUERY.format(owner=self._owner, name=self._name, base=base, head=head, query = _QUERY.format(
pull_request_data=self._PULL_REQUEST, min_page_size=self._min_page_size) owner=self._owner,
result = self._run(query)['repository']['pullRequests'] name=self._name,
if result['totalCount'] > 0: base=base,
return result['nodes'][0] head=head,
pull_request_data=self._PULL_REQUEST,
min_page_size=self._min_page_size,
)
result = self._run(query)["repository"]["pullRequests"]
if result["totalCount"] > 0:
return result["nodes"][0]
else: else:
return {} return {}
def find_pull_requests(self, label_name): def find_pull_requests(self, label_name):
''' """
Get all pull-requests filtered by label name Get all pull-requests filtered by label name
''' """
_QUERY = ''' _QUERY = """
repository(owner: "{owner}" name: "{name}") {{ repository(owner: "{owner}" name: "{name}") {{
pullRequests(first: {min_page_size} labels: "{label_name}" states: OPEN) {{ pullRequests(first: {min_page_size} labels: "{label_name}" states: OPEN) {{
nodes {{ nodes {{
@ -172,18 +189,23 @@ class Query:
}} }}
}} }}
}} }}
''' """
query = _QUERY.format(owner=self._owner, name=self._name, label_name=label_name, query = _QUERY.format(
pull_request_data=self._PULL_REQUEST, min_page_size=self._min_page_size) owner=self._owner,
return self._run(query)['repository']['pullRequests']['nodes'] name=self._name,
label_name=label_name,
pull_request_data=self._PULL_REQUEST,
min_page_size=self._min_page_size,
)
return self._run(query)["repository"]["pullRequests"]["nodes"]
def get_pull_requests(self, before_commit): def get_pull_requests(self, before_commit):
''' """
Get all merged pull-requests from the HEAD of default branch to the last commit (excluding) Get all merged pull-requests from the HEAD of default branch to the last commit (excluding)
''' """
_QUERY = ''' _QUERY = """
repository(owner: "{owner}" name: "{name}") {{ repository(owner: "{owner}" name: "{name}") {{
defaultBranchRef {{ defaultBranchRef {{
target {{ target {{
@ -221,44 +243,60 @@ class Query:
}} }}
}} }}
}} }}
''' """
pull_requests = [] pull_requests = []
not_end = True not_end = True
query = _QUERY.format(owner=self._owner, name=self._name, query = _QUERY.format(
max_page_size=self._max_page_size, owner=self._owner,
min_page_size=self._min_page_size, name=self._name,
pull_request_data=self._PULL_REQUEST, max_page_size=self._max_page_size,
next='') min_page_size=self._min_page_size,
pull_request_data=self._PULL_REQUEST,
next="",
)
while not_end: while not_end:
result = self._run(query)['repository']['defaultBranchRef']['target']['history'] result = self._run(query)["repository"]["defaultBranchRef"]["target"][
not_end = result['pageInfo']['hasNextPage'] "history"
query = _QUERY.format(owner=self._owner, name=self._name, ]
max_page_size=self._max_page_size, not_end = result["pageInfo"]["hasNextPage"]
min_page_size=self._min_page_size, query = _QUERY.format(
pull_request_data=self._PULL_REQUEST, owner=self._owner,
next='after: "{}"'.format(result["pageInfo"]["endCursor"])) name=self._name,
max_page_size=self._max_page_size,
min_page_size=self._min_page_size,
pull_request_data=self._PULL_REQUEST,
next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
)
for commit in result['nodes']: for commit in result["nodes"]:
# FIXME: maybe include `before_commit`? # FIXME: maybe include `before_commit`?
if str(commit['oid']) == str(before_commit): if str(commit["oid"]) == str(before_commit):
not_end = False not_end = False
break break
# TODO: fetch all pull-requests that were merged in a single commit. # TODO: fetch all pull-requests that were merged in a single commit.
assert commit['associatedPullRequests']['totalCount'] <= self._min_page_size assert (
commit["associatedPullRequests"]["totalCount"]
<= self._min_page_size
)
for pull_request in commit['associatedPullRequests']['nodes']: for pull_request in commit["associatedPullRequests"]["nodes"]:
if(pull_request['baseRepository']['nameWithOwner'] == '{}/{}'.format(self._owner, self._name) and if (
pull_request['baseRefName'] == self.default_branch and pull_request["baseRepository"]["nameWithOwner"]
pull_request['mergeCommit']['oid'] == commit['oid']): == "{}/{}".format(self._owner, self._name)
and pull_request["baseRefName"] == self.default_branch
and pull_request["mergeCommit"]["oid"] == commit["oid"]
):
pull_requests.append(pull_request) pull_requests.append(pull_request)
return pull_requests return pull_requests
def create_pull_request(self, source, target, title, description="", draft=False, can_modify=True): def create_pull_request(
_QUERY = ''' self, source, target, title, description="", draft=False, can_modify=True
):
_QUERY = """
createPullRequest(input: {{ createPullRequest(input: {{
baseRefName: "{target}", baseRefName: "{target}",
headRefName: "{source}", headRefName: "{source}",
@ -272,15 +310,22 @@ class Query:
{pull_request_data} {pull_request_data}
}} }}
}} }}
''' """
query = _QUERY.format(target=target, source=source, id=self._id, title=title, body=description, query = _QUERY.format(
draft="true" if draft else "false", modify="true" if can_modify else "false", target=target,
pull_request_data=self._PULL_REQUEST) source=source,
return self._run(query, is_mutation=True)['createPullRequest']['pullRequest'] id=self._id,
title=title,
body=description,
draft="true" if draft else "false",
modify="true" if can_modify else "false",
pull_request_data=self._PULL_REQUEST,
)
return self._run(query, is_mutation=True)["createPullRequest"]["pullRequest"]
def merge_pull_request(self, id): def merge_pull_request(self, id):
_QUERY = ''' _QUERY = """
mergePullRequest(input: {{ mergePullRequest(input: {{
pullRequestId: "{id}" pullRequestId: "{id}"
}}) {{ }}) {{
@ -288,35 +333,35 @@ class Query:
{pull_request_data} {pull_request_data}
}} }}
}} }}
''' """
query = _QUERY.format(id=id, pull_request_data=self._PULL_REQUEST) query = _QUERY.format(id=id, pull_request_data=self._PULL_REQUEST)
return self._run(query, is_mutation=True)['mergePullRequest']['pullRequest'] return self._run(query, is_mutation=True)["mergePullRequest"]["pullRequest"]
# FIXME: figure out how to add more assignees at once # FIXME: figure out how to add more assignees at once
def add_assignee(self, pr, assignee): def add_assignee(self, pr, assignee):
_QUERY = ''' _QUERY = """
addAssigneesToAssignable(input: {{ addAssigneesToAssignable(input: {{
assignableId: "{id1}", assignableId: "{id1}",
assigneeIds: "{id2}" assigneeIds: "{id2}"
}}) {{ }}) {{
clientMutationId clientMutationId
}} }}
''' """
query = _QUERY.format(id1=pr['id'], id2=assignee['id']) query = _QUERY.format(id1=pr["id"], id2=assignee["id"])
self._run(query, is_mutation=True) self._run(query, is_mutation=True)
def set_label(self, pull_request, label_name): def set_label(self, pull_request, label_name):
''' """
Set label by name to the pull request Set label by name to the pull request
Args: Args:
pull_request: JSON object returned by `get_pull_requests()` pull_request: JSON object returned by `get_pull_requests()`
label_name (string): label name label_name (string): label name
''' """
_GET_LABEL = ''' _GET_LABEL = """
repository(owner: "{owner}" name: "{name}") {{ repository(owner: "{owner}" name: "{name}") {{
labels(first: {max_page_size} {next} query: "{label_name}") {{ labels(first: {max_page_size} {next} query: "{label_name}") {{
pageInfo {{ pageInfo {{
@ -330,36 +375,44 @@ class Query:
}} }}
}} }}
}} }}
''' """
_SET_LABEL = ''' _SET_LABEL = """
addLabelsToLabelable(input: {{ addLabelsToLabelable(input: {{
labelableId: "{pr_id}", labelableId: "{pr_id}",
labelIds: "{label_id}" labelIds: "{label_id}"
}}) {{ }}) {{
clientMutationId clientMutationId
}} }}
''' """
labels = [] labels = []
not_end = True not_end = True
query = _GET_LABEL.format(owner=self._owner, name=self._name, label_name=label_name, query = _GET_LABEL.format(
max_page_size=self._max_page_size, owner=self._owner,
next='') name=self._name,
label_name=label_name,
max_page_size=self._max_page_size,
next="",
)
while not_end: while not_end:
result = self._run(query)['repository']['labels'] result = self._run(query)["repository"]["labels"]
not_end = result['pageInfo']['hasNextPage'] not_end = result["pageInfo"]["hasNextPage"]
query = _GET_LABEL.format(owner=self._owner, name=self._name, label_name=label_name, query = _GET_LABEL.format(
max_page_size=self._max_page_size, owner=self._owner,
next='after: "{}"'.format(result["pageInfo"]["endCursor"])) name=self._name,
label_name=label_name,
max_page_size=self._max_page_size,
next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
)
labels += [label for label in result['nodes']] labels += [label for label in result["nodes"]]
if not labels: if not labels:
return return
query = _SET_LABEL.format(pr_id=pull_request['id'], label_id=labels[0]['id']) query = _SET_LABEL.format(pr_id=pull_request["id"], label_id=labels[0]["id"])
self._run(query, is_mutation=True) self._run(query, is_mutation=True)
def _run(self, query, is_mutation=False): def _run(self, query, is_mutation=False):
@ -385,19 +438,21 @@ class Query:
status_forcelist=status_forcelist, status_forcelist=status_forcelist,
) )
adapter = HTTPAdapter(max_retries=retry) adapter = HTTPAdapter(max_retries=retry)
session.mount('http://', adapter) session.mount("http://", adapter)
session.mount('https://', adapter) session.mount("https://", adapter)
return session return session
headers = {'Authorization': 'bearer {}'.format(self._token)} headers = {"Authorization": "bearer {}".format(self._token)}
if is_mutation: if is_mutation:
query = ''' query = """
mutation {{ mutation {{
{query} {query}
}} }}
'''.format(query=query) """.format(
query=query
)
else: else:
query = ''' query = """
query {{ query {{
{query} {query}
rateLimit {{ rateLimit {{
@ -405,23 +460,38 @@ class Query:
remaining remaining
}} }}
}} }}
'''.format(query=query) """.format(
query=query
)
while True: while True:
request = requests_retry_session().post('https://api.github.com/graphql', json={'query': query}, headers=headers) request = requests_retry_session().post(
"https://api.github.com/graphql", json={"query": query}, headers=headers
)
if request.status_code == 200: if request.status_code == 200:
result = request.json() result = request.json()
if 'errors' in result: if "errors" in result:
raise Exception('Errors occurred: {}\nOriginal query: {}'.format(result["errors"], query)) raise Exception(
"Errors occurred: {}\nOriginal query: {}".format(
result["errors"], query
)
)
if not is_mutation: if not is_mutation:
import inspect import inspect
caller = inspect.getouterframes(inspect.currentframe(), 2)[1][3] caller = inspect.getouterframes(inspect.currentframe(), 2)[1][3]
if caller not in list(self.api_costs.keys()): if caller not in list(self.api_costs.keys()):
self.api_costs[caller] = 0 self.api_costs[caller] = 0
self.api_costs[caller] += result['data']['rateLimit']['cost'] self.api_costs[caller] += result["data"]["rateLimit"]["cost"]
return result['data'] return result["data"]
else: else:
import json import json
raise Exception('Query failed with code {code}:\n{json}'.format(code=request.status_code, json=json.dumps(request.json(), indent=4)))
raise Exception(
"Query failed with code {code}:\n{json}".format(
code=request.status_code,
json=json.dumps(request.json(), indent=4),
)
)

View File

@ -6,6 +6,7 @@ import json
import requests # type: ignore import requests # type: ignore
from get_robot_token import get_parameter_from_ssm from get_robot_token import get_parameter_from_ssm
class ClickHouseHelper: class ClickHouseHelper:
def __init__(self, url=None, user=None, password=None): def __init__(self, url=None, user=None, password=None):
self.url2 = None self.url2 = None
@ -15,27 +16,35 @@ class ClickHouseHelper:
url = get_parameter_from_ssm("clickhouse-test-stat-url") url = get_parameter_from_ssm("clickhouse-test-stat-url")
self.url2 = get_parameter_from_ssm("clickhouse-test-stat-url2") self.url2 = get_parameter_from_ssm("clickhouse-test-stat-url2")
self.auth2 = { self.auth2 = {
'X-ClickHouse-User': get_parameter_from_ssm("clickhouse-test-stat-login2"), "X-ClickHouse-User": get_parameter_from_ssm(
'X-ClickHouse-Key': '' "clickhouse-test-stat-login2"
),
"X-ClickHouse-Key": "",
} }
self.url = url self.url = url
self.auth = { self.auth = {
'X-ClickHouse-User': user if user is not None else get_parameter_from_ssm("clickhouse-test-stat-login"), "X-ClickHouse-User": user
'X-ClickHouse-Key': password if password is not None else get_parameter_from_ssm("clickhouse-test-stat-password") if user is not None
else get_parameter_from_ssm("clickhouse-test-stat-login"),
"X-ClickHouse-Key": password
if password is not None
else get_parameter_from_ssm("clickhouse-test-stat-password"),
} }
@staticmethod @staticmethod
def _insert_json_str_info_impl(url, auth, db, table, json_str): def _insert_json_str_info_impl(url, auth, db, table, json_str):
params = { params = {
'database': db, "database": db,
'query': 'INSERT INTO {table} FORMAT JSONEachRow'.format(table=table), "query": "INSERT INTO {table} FORMAT JSONEachRow".format(table=table),
'date_time_input_format': 'best_effort', "date_time_input_format": "best_effort",
'send_logs_level': 'warning', "send_logs_level": "warning",
} }
for i in range(5): for i in range(5):
response = requests.post(url, params=params, data=json_str, headers=auth, verify=False) response = requests.post(
url, params=params, data=json_str, headers=auth, verify=False
)
logging.info("Response content '%s'", response.content) logging.info("Response content '%s'", response.content)
@ -43,16 +52,25 @@ class ClickHouseHelper:
break break
error = ( error = (
"Cannot insert data into clickhouse at try " + str(i) "Cannot insert data into clickhouse at try "
+ ": HTTP code " + str(response.status_code) + ": '" + str(i)
+ str(response.text) + "'") + ": HTTP code "
+ str(response.status_code)
+ ": '"
+ str(response.text)
+ "'"
)
if response.status_code >= 500: if response.status_code >= 500:
# A retriable error # A retriable error
time.sleep(1) time.sleep(1)
continue continue
logging.info("Request headers '%s', body '%s'", response.request.headers, response.request.body) logging.info(
"Request headers '%s', body '%s'",
response.request.headers,
response.request.body,
)
raise Exception(error) raise Exception(error)
else: else:
@ -72,18 +90,20 @@ class ClickHouseHelper:
for event in events: for event in events:
jsons.append(json.dumps(event)) jsons.append(json.dumps(event))
self._insert_json_str_info(db, table, ','.join(jsons)) self._insert_json_str_info(db, table, ",".join(jsons))
def _select_and_get_json_each_row(self, db, query): def _select_and_get_json_each_row(self, db, query):
params = { params = {
'database': db, "database": db,
'query': query, "query": query,
'default_format': 'JSONEachRow', "default_format": "JSONEachRow",
} }
for i in range(5): for i in range(5):
response = None response = None
try: try:
response = requests.get(self.url, params=params, headers=self.auth, verify=False) response = requests.get(
self.url, params=params, headers=self.auth, verify=False
)
response.raise_for_status() response.raise_for_status()
return response.text return response.text
except Exception as ex: except Exception as ex:
@ -97,15 +117,21 @@ class ClickHouseHelper:
def select_json_each_row(self, db, query): def select_json_each_row(self, db, query):
text = self._select_and_get_json_each_row(db, query) text = self._select_and_get_json_each_row(db, query)
result = [] result = []
for line in text.split('\n'): for line in text.split("\n"):
if line: if line:
result.append(json.loads(line)) result.append(json.loads(line))
return result return result
def prepare_tests_results_for_clickhouse( def prepare_tests_results_for_clickhouse(
pr_info, test_results, pr_info,
check_status, check_duration, check_start_time, test_results,
report_url, check_name): check_status,
check_duration,
check_start_time,
report_url,
check_name,
):
pull_request_url = "https://github.com/ClickHouse/ClickHouse/commits/master" pull_request_url = "https://github.com/ClickHouse/ClickHouse/commits/master"
base_ref = "master" base_ref = "master"
@ -147,13 +173,14 @@ def prepare_tests_results_for_clickhouse(
test_time = 0 test_time = 0
if len(test_result) > 2 and test_result[2]: if len(test_result) > 2 and test_result[2]:
test_time = test_result[2] test_time = test_result[2]
current_row['test_duration_ms'] = int(float(test_time) * 1000) current_row["test_duration_ms"] = int(float(test_time) * 1000)
current_row['test_name'] = test_name current_row["test_name"] = test_name
current_row['test_status'] = test_status current_row["test_status"] = test_status
result.append(current_row) result.append(current_row)
return result return result
def mark_flaky_tests(clickhouse_helper, check_name, test_results): def mark_flaky_tests(clickhouse_helper, check_name, test_results):
try: try:
query = """ query = """
@ -164,14 +191,16 @@ def mark_flaky_tests(clickhouse_helper, check_name, test_results):
AND check_name = '{check_name}' AND check_name = '{check_name}'
AND (test_status = 'FAIL' OR test_status = 'FLAKY') AND (test_status = 'FAIL' OR test_status = 'FLAKY')
AND pull_request_number = 0 AND pull_request_number = 0
""".format(check_name=check_name) """.format(
check_name=check_name
)
tests_data = clickhouse_helper.select_json_each_row('gh-data', query) tests_data = clickhouse_helper.select_json_each_row("gh-data", query)
master_failed_tests = {row['test_name'] for row in tests_data} master_failed_tests = {row["test_name"] for row in tests_data}
logging.info("Found flaky tests: %s", ', '.join(master_failed_tests)) logging.info("Found flaky tests: %s", ", ".join(master_failed_tests))
for test_result in test_results: for test_result in test_results:
if test_result[1] == 'FAIL' and test_result[0] in master_failed_tests: if test_result[1] == "FAIL" and test_result[0] in master_failed_tests:
test_result[1] = 'FLAKY' test_result[1] = "FLAKY"
except Exception as ex: except Exception as ex:
logging.info("Exception happened during flaky tests fetch %s", ex) logging.info("Exception happened during flaky tests fetch %s", ex)

View File

@ -18,13 +18,16 @@ from tee_popen import TeePopen
NAME = "Woboq Build (actions)" NAME = "Woboq Build (actions)"
def get_run_command(repo_path, output_path, image): def get_run_command(repo_path, output_path, image):
cmd = "docker run " + \ cmd = (
f"--volume={repo_path}:/repo_folder " \ "docker run " + f"--volume={repo_path}:/repo_folder "
f"--volume={output_path}:/test_output " \ f"--volume={output_path}:/test_output "
f"-e 'DATA=https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data' {image}" f"-e 'DATA=https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data' {image}"
)
return cmd return cmd
if __name__ == "__main__": if __name__ == "__main__":
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
@ -37,8 +40,8 @@ if __name__ == "__main__":
if not os.path.exists(temp_path): if not os.path.exists(temp_path):
os.makedirs(temp_path) os.makedirs(temp_path)
docker_image = get_image_with_version(IMAGES_PATH, 'clickhouse/codebrowser') docker_image = get_image_with_version(IMAGES_PATH, "clickhouse/codebrowser")
s3_helper = S3Helper('https://s3.amazonaws.com') s3_helper = S3Helper("https://s3.amazonaws.com")
result_path = os.path.join(temp_path, "result_path") result_path = os.path.join(temp_path, "result_path")
if not os.path.exists(result_path): if not os.path.exists(result_path):
@ -62,14 +65,20 @@ if __name__ == "__main__":
report_path = os.path.join(result_path, "html_report") report_path = os.path.join(result_path, "html_report")
logging.info("Report path %s", report_path) logging.info("Report path %s", report_path)
s3_path_prefix = "codebrowser" s3_path_prefix = "codebrowser"
html_urls = s3_helper.fast_parallel_upload_dir(report_path, s3_path_prefix, 'clickhouse-test-reports') html_urls = s3_helper.fast_parallel_upload_dir(
report_path, s3_path_prefix, "clickhouse-test-reports"
)
index_html = '<a href="https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/index.html">HTML report</a>' index_html = '<a href="https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/index.html">HTML report</a>'
test_results = [(index_html, "Look at the report")] test_results = [(index_html, "Look at the report")]
report_url = upload_results(s3_helper, 0, os.getenv("GITHUB_SHA"), test_results, [], NAME) report_url = upload_results(
s3_helper, 0, os.getenv("GITHUB_SHA"), test_results, [], NAME
)
print(f"::notice ::Report url: {report_url}") print(f"::notice ::Report url: {report_url}")
post_commit_status(gh, os.getenv("GITHUB_SHA"), NAME, "Report built", "success", report_url) post_commit_status(
gh, os.getenv("GITHUB_SHA"), NAME, "Report built", "success", report_url
)

View File

@ -14,9 +14,9 @@ def override_status(status, check_name, invert=False):
return "success" return "success"
if invert: if invert:
if status == 'success': if status == "success":
return 'error' return "error"
return 'success' return "success"
return status return status
@ -56,6 +56,6 @@ def post_commit_status(gh, sha, check_name, description, state, report_url):
def post_commit_status_to_file(file_path, description, state, report_url): def post_commit_status_to_file(file_path, description, state, report_url):
if os.path.exists(file_path): if os.path.exists(file_path):
raise Exception(f'File "{file_path}" already exists!') raise Exception(f'File "{file_path}" already exists!')
with open(file_path, 'w', encoding='utf-8') as f: with open(file_path, "w", encoding="utf-8") as f:
out = csv.writer(f, delimiter='\t') out = csv.writer(f, delimiter="\t")
out.writerow([state, report_url, description]) out.writerow([state, report_url, description])

View File

@ -16,34 +16,40 @@ from build_download_helper import download_builds_filter
from upload_result_helper import upload_results from upload_result_helper import upload_results
from docker_pull_helper import get_images_with_versions from docker_pull_helper import get_images_with_versions
from commit_status_helper import post_commit_status from commit_status_helper import post_commit_status
from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse from clickhouse_helper import (
ClickHouseHelper,
mark_flaky_tests,
prepare_tests_results_for_clickhouse,
)
from stopwatch import Stopwatch from stopwatch import Stopwatch
from rerun_helper import RerunHelper from rerun_helper import RerunHelper
IMAGE_UBUNTU = "clickhouse/test-old-ubuntu" IMAGE_UBUNTU = "clickhouse/test-old-ubuntu"
IMAGE_CENTOS = "clickhouse/test-old-centos" IMAGE_CENTOS = "clickhouse/test-old-centos"
MAX_GLIBC_VERSION = '2.4' MAX_GLIBC_VERSION = "2.4"
DOWNLOAD_RETRIES_COUNT = 5 DOWNLOAD_RETRIES_COUNT = 5
CHECK_NAME = "Compatibility check (actions)" CHECK_NAME = "Compatibility check (actions)"
def process_os_check(log_path): def process_os_check(log_path):
name = os.path.basename(log_path) name = os.path.basename(log_path)
with open(log_path, 'r') as log: with open(log_path, "r") as log:
line = log.read().split('\n')[0].strip() line = log.read().split("\n")[0].strip()
if line != 'OK': if line != "OK":
return (name, "FAIL") return (name, "FAIL")
else: else:
return (name, "OK") return (name, "OK")
def process_glibc_check(log_path): def process_glibc_check(log_path):
bad_lines = [] bad_lines = []
with open(log_path, 'r') as log: with open(log_path, "r") as log:
for line in log: for line in log:
if line.strip(): if line.strip():
columns = line.strip().split(' ') columns = line.strip().split(" ")
symbol_with_glibc = columns[-2] # sysconf@GLIBC_2.2.5 symbol_with_glibc = columns[-2] # sysconf@GLIBC_2.2.5
_, version = symbol_with_glibc.split('@GLIBC_') _, version = symbol_with_glibc.split("@GLIBC_")
if version == 'PRIVATE': if version == "PRIVATE":
bad_lines.append((symbol_with_glibc, "FAIL")) bad_lines.append((symbol_with_glibc, "FAIL"))
elif StrictVersion(version) > MAX_GLIBC_VERSION: elif StrictVersion(version) > MAX_GLIBC_VERSION:
bad_lines.append((symbol_with_glibc, "FAIL")) bad_lines.append((symbol_with_glibc, "FAIL"))
@ -51,6 +57,7 @@ def process_glibc_check(log_path):
bad_lines.append(("glibc check", "OK")) bad_lines.append(("glibc check", "OK"))
return bad_lines return bad_lines
def process_result(result_folder, server_log_folder): def process_result(result_folder, server_log_folder):
summary = process_glibc_check(os.path.join(result_folder, "glibc.log")) summary = process_glibc_check(os.path.join(result_folder, "glibc.log"))
@ -86,16 +93,18 @@ def process_result(result_folder, server_log_folder):
return status, description, summary, result_logs return status, description, summary, result_logs
def get_run_commands(build_path, result_folder, server_log_folder, image_centos, image_ubuntu): def get_run_commands(
build_path, result_folder, server_log_folder, image_centos, image_ubuntu
):
return [ return [
f"readelf -s {build_path}/usr/bin/clickhouse | grep '@GLIBC_' > {result_folder}/glibc.log", f"readelf -s {build_path}/usr/bin/clickhouse | grep '@GLIBC_' > {result_folder}/glibc.log",
f"readelf -s {build_path}/usr/bin/clickhouse-odbc-bridge | grep '@GLIBC_' >> {result_folder}/glibc.log", f"readelf -s {build_path}/usr/bin/clickhouse-odbc-bridge | grep '@GLIBC_' >> {result_folder}/glibc.log",
f"docker run --network=host --volume={build_path}/usr/bin/clickhouse:/clickhouse " \ f"docker run --network=host --volume={build_path}/usr/bin/clickhouse:/clickhouse "
f"--volume={build_path}/etc/clickhouse-server:/config " \ f"--volume={build_path}/etc/clickhouse-server:/config "
f"--volume={server_log_folder}:/var/log/clickhouse-server {image_ubuntu} > {result_folder}/ubuntu:12.04", f"--volume={server_log_folder}:/var/log/clickhouse-server {image_ubuntu} > {result_folder}/ubuntu:12.04",
f"docker run --network=host --volume={build_path}/usr/bin/clickhouse:/clickhouse " \ f"docker run --network=host --volume={build_path}/usr/bin/clickhouse:/clickhouse "
f"--volume={build_path}/etc/clickhouse-server:/config " \ f"--volume={build_path}/etc/clickhouse-server:/config "
f"--volume={server_log_folder}:/var/log/clickhouse-server {image_centos} > {result_folder}/centos:5", f"--volume={server_log_folder}:/var/log/clickhouse-server {image_centos} > {result_folder}/centos:5",
] ]
@ -124,14 +133,18 @@ if __name__ == "__main__":
os.makedirs(packages_path) os.makedirs(packages_path)
def url_filter(url): def url_filter(url):
return url.endswith('.deb') and ('clickhouse-common-static_' in url or 'clickhouse-server_' in url) return url.endswith(".deb") and (
"clickhouse-common-static_" in url or "clickhouse-server_" in url
)
download_builds_filter(CHECK_NAME, reports_path, packages_path, url_filter) download_builds_filter(CHECK_NAME, reports_path, packages_path, url_filter)
for f in os.listdir(packages_path): for f in os.listdir(packages_path):
if '.deb' in f: if ".deb" in f:
full_path = os.path.join(packages_path, f) full_path = os.path.join(packages_path, f)
subprocess.check_call(f"dpkg -x {full_path} {packages_path} && rm {full_path}", shell=True) subprocess.check_call(
f"dpkg -x {full_path} {packages_path} && rm {full_path}", shell=True
)
server_log_path = os.path.join(temp_path, "server_log") server_log_path = os.path.join(temp_path, "server_log")
if not os.path.exists(server_log_path): if not os.path.exists(server_log_path):
@ -141,7 +154,9 @@ if __name__ == "__main__":
if not os.path.exists(result_path): if not os.path.exists(result_path):
os.makedirs(result_path) os.makedirs(result_path)
run_commands = get_run_commands(packages_path, result_path, server_log_path, docker_images[0], docker_images[1]) run_commands = get_run_commands(
packages_path, result_path, server_log_path, docker_images[0], docker_images[1]
)
state = "success" state = "success"
for run_command in run_commands: for run_command in run_commands:
@ -154,15 +169,32 @@ if __name__ == "__main__":
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
s3_helper = S3Helper('https://s3.amazonaws.com') s3_helper = S3Helper("https://s3.amazonaws.com")
state, description, test_results, additional_logs = process_result(result_path, server_log_path) state, description, test_results, additional_logs = process_result(
result_path, server_log_path
)
ch_helper = ClickHouseHelper() ch_helper = ClickHouseHelper()
mark_flaky_tests(ch_helper, CHECK_NAME, test_results) mark_flaky_tests(ch_helper, CHECK_NAME, test_results)
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_logs, CHECK_NAME) report_url = upload_results(
s3_helper,
pr_info.number,
pr_info.sha,
test_results,
additional_logs,
CHECK_NAME,
)
print(f"::notice ::Report url: {report_url}") print(f"::notice ::Report url: {report_url}")
post_commit_status(gh, pr_info.sha, CHECK_NAME, description, state, report_url) post_commit_status(gh, pr_info.sha, CHECK_NAME, description, state, report_url)
prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, CHECK_NAME) prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
test_results,
state,
stopwatch.duration_seconds,
stopwatch.start_time_str,
report_url,
CHECK_NAME,
)
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)

View File

@ -3,20 +3,21 @@ import subprocess
import logging import logging
import os import os
def compress_file_fast(path, archive_path): def compress_file_fast(path, archive_path):
if os.path.exists('/usr/bin/pigz'): if os.path.exists("/usr/bin/pigz"):
subprocess.check_call("pigz < {} > {}".format(path, archive_path), shell=True) subprocess.check_call("pigz < {} > {}".format(path, archive_path), shell=True)
else: else:
subprocess.check_call("gzip < {} > {}".format(path, archive_path), shell=True) subprocess.check_call("gzip < {} > {}".format(path, archive_path), shell=True)
def compress_fast(path, archive_path, exclude=None): def compress_fast(path, archive_path, exclude=None):
pigz_part = '' pigz_part = ""
if os.path.exists('/usr/bin/pigz'): if os.path.exists("/usr/bin/pigz"):
logging.info("pigz found, will compress and decompress faster") logging.info("pigz found, will compress and decompress faster")
pigz_part = "--use-compress-program='pigz'" pigz_part = "--use-compress-program='pigz'"
else: else:
pigz_part = '-z' pigz_part = "-z"
logging.info("no pigz, compressing with default tar") logging.info("no pigz, compressing with default tar")
if exclude is None: if exclude is None:
@ -31,21 +32,36 @@ def compress_fast(path, archive_path, exclude=None):
path = os.path.dirname(path) path = os.path.dirname(path)
else: else:
path += "/.." path += "/.."
cmd = "tar {} {} -cf {} -C {} {}".format(pigz_part, exclude_part, archive_path, path, fname) cmd = "tar {} {} -cf {} -C {} {}".format(
pigz_part, exclude_part, archive_path, path, fname
)
logging.debug("compress_fast cmd: %s", cmd) logging.debug("compress_fast cmd: %s", cmd)
subprocess.check_call(cmd, shell=True) subprocess.check_call(cmd, shell=True)
def decompress_fast(archive_path, result_path=None): def decompress_fast(archive_path, result_path=None):
pigz_part = '' pigz_part = ""
if os.path.exists('/usr/bin/pigz'): if os.path.exists("/usr/bin/pigz"):
logging.info("pigz found, will compress and decompress faster ('%s' -> '%s')", archive_path, result_path) logging.info(
"pigz found, will compress and decompress faster ('%s' -> '%s')",
archive_path,
result_path,
)
pigz_part = "--use-compress-program='pigz'" pigz_part = "--use-compress-program='pigz'"
else: else:
pigz_part = '-z' pigz_part = "-z"
logging.info("no pigz, decompressing with default tar ('%s' -> '%s')", archive_path, result_path) logging.info(
"no pigz, decompressing with default tar ('%s' -> '%s')",
archive_path,
result_path,
)
if result_path is None: if result_path is None:
subprocess.check_call("tar {} -xf {}".format(pigz_part, archive_path), shell=True) subprocess.check_call(
"tar {} -xf {}".format(pigz_part, archive_path), shell=True
)
else: else:
subprocess.check_call("tar {} -xf {} -C {}".format(pigz_part, archive_path, result_path), shell=True) subprocess.check_call(
"tar {} -xf {} -C {}".format(pigz_part, archive_path, result_path),
shell=True,
)

View File

@ -8,23 +8,27 @@ import logging
from typing import Optional from typing import Optional
class DockerImage: class DockerImage:
def __init__(self, name, version : Optional[str] = None): def __init__(self, name, version: Optional[str] = None):
self.name = name self.name = name
if version is None: if version is None:
self.version = 'latest' self.version = "latest"
else: else:
self.version = version self.version = version
def __str__(self): def __str__(self):
return f"{self.name}:{self.version}" return f"{self.name}:{self.version}"
def get_images_with_versions(reports_path, required_image, pull=True, version : Optional[str] = None):
def get_images_with_versions(
reports_path, required_image, pull=True, version: Optional[str] = None
):
images_path = None images_path = None
for root, _, files in os.walk(reports_path): for root, _, files in os.walk(reports_path):
for f in files: for f in files:
if f == 'changed_images.json': if f == "changed_images.json":
images_path = os.path.join(root, 'changed_images.json') images_path = os.path.join(root, "changed_images.json")
break break
if not images_path: if not images_path:
@ -34,7 +38,7 @@ def get_images_with_versions(reports_path, required_image, pull=True, version :
if images_path is not None and os.path.exists(images_path): if images_path is not None and os.path.exists(images_path):
logging.info("Images file exists") logging.info("Images file exists")
with open(images_path, 'r', encoding='utf-8') as images_fd: with open(images_path, "r", encoding="utf-8") as images_fd:
images = json.load(images_fd) images = json.load(images_fd)
logging.info("Got images %s", images) logging.info("Got images %s", images)
else: else:
@ -52,15 +56,22 @@ def get_images_with_versions(reports_path, required_image, pull=True, version :
for i in range(10): for i in range(10):
try: try:
logging.info("Pulling image %s", docker_image) logging.info("Pulling image %s", docker_image)
latest_error = subprocess.check_output(f"docker pull {docker_image}", stderr=subprocess.STDOUT, shell=True) latest_error = subprocess.check_output(
f"docker pull {docker_image}",
stderr=subprocess.STDOUT,
shell=True,
)
break break
except Exception as ex: except Exception as ex:
time.sleep(i * 3) time.sleep(i * 3)
logging.info("Got execption pulling docker %s", ex) logging.info("Got execption pulling docker %s", ex)
else: else:
raise Exception(f"Cannot pull dockerhub for image docker pull {docker_image} because of {latest_error}") raise Exception(
f"Cannot pull dockerhub for image docker pull {docker_image} because of {latest_error}"
)
return docker_images return docker_images
def get_image_with_version(reports_path, image, pull=True, version=None): def get_image_with_version(reports_path, image, pull=True, version=None):
return get_images_with_versions(reports_path, [image], pull, version=version)[0] return get_images_with_versions(reports_path, [image], pull, version=version)[0]

View File

@ -40,7 +40,9 @@ if __name__ == "__main__":
if not pr_info.has_changes_in_documentation(): if not pr_info.has_changes_in_documentation():
logging.info("No changes in documentation") logging.info("No changes in documentation")
commit = get_commit(gh, pr_info.sha) commit = get_commit(gh, pr_info.sha)
commit.create_status(context=NAME, description="No changes in docs", state="success") commit.create_status(
context=NAME, description="No changes in docs", state="success"
)
sys.exit(0) sys.exit(0)
logging.info("Has changes in docs") logging.info("Has changes in docs")
@ -48,15 +50,15 @@ if __name__ == "__main__":
if not os.path.exists(temp_path): if not os.path.exists(temp_path):
os.makedirs(temp_path) os.makedirs(temp_path)
docker_image = get_image_with_version(temp_path, 'clickhouse/docs-check') docker_image = get_image_with_version(temp_path, "clickhouse/docs-check")
test_output = os.path.join(temp_path, 'docs_check_log') test_output = os.path.join(temp_path, "docs_check_log")
if not os.path.exists(test_output): if not os.path.exists(test_output):
os.makedirs(test_output) os.makedirs(test_output)
cmd = f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/repo_path --volume={test_output}:/output_path {docker_image}" cmd = f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/repo_path --volume={test_output}:/output_path {docker_image}"
run_log_path = os.path.join(test_output, 'runlog.log') run_log_path = os.path.join(test_output, "runlog.log")
logging.info("Running command: '%s'", cmd) logging.info("Running command: '%s'", cmd)
with TeePopen(cmd, run_log_path) as process: with TeePopen(cmd, run_log_path) as process:
@ -82,10 +84,10 @@ if __name__ == "__main__":
for f in files: for f in files:
path = os.path.join(test_output, f) path = os.path.join(test_output, f)
additional_files.append(path) additional_files.append(path)
with open(path, 'r', encoding='utf-8') as check_file: with open(path, "r", encoding="utf-8") as check_file:
for line in check_file: for line in check_file:
if "ERROR" in line: if "ERROR" in line:
lines.append((line.split(':')[-1], "FAIL")) lines.append((line.split(":")[-1], "FAIL"))
if lines: if lines:
status = "failure" status = "failure"
description = "Found errors in docs" description = "Found errors in docs"
@ -94,12 +96,22 @@ if __name__ == "__main__":
else: else:
lines.append(("Non zero exit code", "FAIL")) lines.append(("Non zero exit code", "FAIL"))
s3_helper = S3Helper('https://s3.amazonaws.com') s3_helper = S3Helper("https://s3.amazonaws.com")
ch_helper = ClickHouseHelper() ch_helper = ClickHouseHelper()
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME) report_url = upload_results(
s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME
)
print("::notice ::Report url: {report_url}") print("::notice ::Report url: {report_url}")
post_commit_status(gh, pr_info.sha, NAME, description, status, report_url) post_commit_status(gh, pr_info.sha, NAME, description, status, report_url)
prepared_events = prepare_tests_results_for_clickhouse(pr_info, lines, status, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, NAME) prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
lines,
status,
stopwatch.duration_seconds,
stopwatch.start_time_str,
report_url,
NAME,
)
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)

View File

@ -34,19 +34,23 @@ if __name__ == "__main__":
if not os.path.exists(temp_path): if not os.path.exists(temp_path):
os.makedirs(temp_path) os.makedirs(temp_path)
docker_image = get_image_with_version(temp_path, 'clickhouse/docs-release') docker_image = get_image_with_version(temp_path, "clickhouse/docs-release")
test_output = os.path.join(temp_path, 'docs_release_log') test_output = os.path.join(temp_path, "docs_release_log")
if not os.path.exists(test_output): if not os.path.exists(test_output):
os.makedirs(test_output) os.makedirs(test_output)
token = CLOUDFLARE_TOKEN token = CLOUDFLARE_TOKEN
cmd = "docker run --cap-add=SYS_PTRACE --volume=$SSH_AUTH_SOCK:/ssh-agent -e SSH_AUTH_SOCK=/ssh-agent " \ cmd = (
f"-e CLOUDFLARE_TOKEN={token} --volume={repo_path}:/repo_path --volume={test_output}:/output_path {docker_image}" "docker run --cap-add=SYS_PTRACE --volume=$SSH_AUTH_SOCK:/ssh-agent -e SSH_AUTH_SOCK=/ssh-agent "
f"-e CLOUDFLARE_TOKEN={token} --volume={repo_path}:/repo_path --volume={test_output}:/output_path {docker_image}"
)
run_log_path = os.path.join(test_output, 'runlog.log') run_log_path = os.path.join(test_output, "runlog.log")
with open(run_log_path, 'w', encoding='utf-8') as log, SSHKey("ROBOT_CLICKHOUSE_SSH_KEY"): with open(run_log_path, "w", encoding="utf-8") as log, SSHKey(
"ROBOT_CLICKHOUSE_SSH_KEY"
):
with subprocess.Popen(cmd, shell=True, stderr=log, stdout=log) as process: with subprocess.Popen(cmd, shell=True, stderr=log, stdout=log) as process:
retcode = process.wait() retcode = process.wait()
if retcode == 0: if retcode == 0:
@ -70,10 +74,10 @@ if __name__ == "__main__":
for f in files: for f in files:
path = os.path.join(test_output, f) path = os.path.join(test_output, f)
additional_files.append(path) additional_files.append(path)
with open(path, 'r', encoding='utf-8') as check_file: with open(path, "r", encoding="utf-8") as check_file:
for line in check_file: for line in check_file:
if "ERROR" in line: if "ERROR" in line:
lines.append((line.split(':')[-1], "FAIL")) lines.append((line.split(":")[-1], "FAIL"))
if lines: if lines:
status = "failure" status = "failure"
description = "Found errors in docs" description = "Found errors in docs"
@ -82,9 +86,13 @@ if __name__ == "__main__":
else: else:
lines.append(("Non zero exit code", "FAIL")) lines.append(("Non zero exit code", "FAIL"))
s3_helper = S3Helper('https://s3.amazonaws.com') s3_helper = S3Helper("https://s3.amazonaws.com")
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME) report_url = upload_results(
s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME
)
print("::notice ::Report url: {report_url}") print("::notice ::Report url: {report_url}")
commit = get_commit(gh, pr_info.sha) commit = get_commit(gh, pr_info.sha)
commit.create_status(context=NAME, description=description, state=status, target_url=report_url) commit.create_status(
context=NAME, description=description, state=status, target_url=report_url
)

View File

@ -22,7 +22,9 @@ CLICKHOUSE_CLIENT_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/relea
CLICKHOUSE_COMMON_STATIC_PACKET_NAME = "clickhouse-common-static_{version}_amd64.deb" CLICKHOUSE_COMMON_STATIC_PACKET_NAME = "clickhouse-common-static_{version}_amd64.deb"
CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME = "clickhouse-common-static-dbg_{version}_amd64.deb" CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME = (
"clickhouse-common-static-dbg_{version}_amd64.deb"
)
CLICKHOUSE_SERVER_PACKET_NAME = "clickhouse-server_{version}_all.deb" CLICKHOUSE_SERVER_PACKET_NAME = "clickhouse-server_{version}_all.deb"
CLICKHOUSE_CLIENT_PACKET_NAME = "clickhouse-client_{version}_all.deb" CLICKHOUSE_CLIENT_PACKET_NAME = "clickhouse-client_{version}_all.deb"
@ -35,7 +37,9 @@ class Version:
self.version = version self.version = version
def __lt__(self, other): def __lt__(self, other):
return list(map(int, self.version.split('.'))) < list(map(int, other.version.split('.'))) return list(map(int, self.version.split("."))) < list(
map(int, other.version.split("."))
)
def __str__(self): def __str__(self):
return self.version return self.version
@ -49,6 +53,7 @@ class ReleaseInfo:
def __repr__(self): def __repr__(self):
return f"ReleaseInfo: {self.version}-{self.type}" return f"ReleaseInfo: {self.version}-{self.type}"
def find_previous_release(server_version, releases): def find_previous_release(server_version, releases):
releases.sort(key=lambda x: x.version, reverse=True) releases.sort(key=lambda x: x.version, reverse=True)
@ -66,15 +71,26 @@ def get_previous_release(server_version=None):
page = 1 page = 1
found = False found = False
while not found: while not found:
response = requests.get(CLICKHOUSE_TAGS_URL, {'page': page, 'per_page': 100}) response = requests.get(CLICKHOUSE_TAGS_URL, {"page": page, "per_page": 100})
if not response.ok: if not response.ok:
raise Exception('Cannot load the list of tags from github: ' + response.reason) raise Exception(
"Cannot load the list of tags from github: " + response.reason
)
releases_str = set(re.findall(VERSION_PATTERN, response.text)) releases_str = set(re.findall(VERSION_PATTERN, response.text))
if len(releases_str) == 0: if len(releases_str) == 0:
raise Exception('Cannot find previous release for ' + str(server_version) + ' server version') raise Exception(
"Cannot find previous release for "
+ str(server_version)
+ " server version"
)
releases = list(map(lambda x: ReleaseInfo(Version(x.split('-')[0]), x.split('-')[1]), releases_str)) releases = list(
map(
lambda x: ReleaseInfo(Version(x.split("-")[0]), x.split("-")[1]),
releases_str,
)
)
found, previous_release = find_previous_release(server_version, releases) found, previous_release = find_previous_release(server_version, releases)
page += 1 page += 1
@ -87,34 +103,53 @@ def download_packet(url, out_path):
""" """
response = requests.get(url) response = requests.get(url)
logging.info('Downloading %s', url) logging.info("Downloading %s", url)
if response.ok: if response.ok:
open(out_path, 'wb').write(response.content) open(out_path, "wb").write(response.content)
def download_packets(release, dest_path=PACKETS_DIR): def download_packets(release, dest_path=PACKETS_DIR):
if not os.path.exists(dest_path): if not os.path.exists(dest_path):
os.makedirs(dest_path) os.makedirs(dest_path)
logging.info('Will download %s', release) logging.info("Will download %s", release)
download_packet( download_packet(
CLICKHOUSE_COMMON_STATIC_DOWNLOAD_URL.format(version=release.version, type=release.type), CLICKHOUSE_COMMON_STATIC_DOWNLOAD_URL.format(
out_path=os.path.join(dest_path, CLICKHOUSE_COMMON_STATIC_PACKET_NAME.format(version=release.version)), version=release.version, type=release.type
),
out_path=os.path.join(
dest_path,
CLICKHOUSE_COMMON_STATIC_PACKET_NAME.format(version=release.version),
),
) )
download_packet( download_packet(
CLICKHOUSE_COMMON_STATIC_DBG_DOWNLOAD_URL.format(version=release.version, type=release.type), CLICKHOUSE_COMMON_STATIC_DBG_DOWNLOAD_URL.format(
out_path=os.path.join(dest_path, CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME.format(version=release.version)), version=release.version, type=release.type
),
out_path=os.path.join(
dest_path,
CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME.format(version=release.version),
),
) )
download_packet( download_packet(
CLICKHOUSE_SERVER_DOWNLOAD_URL.format(version=release.version, type=release.type), CLICKHOUSE_SERVER_DOWNLOAD_URL.format(
out_path=os.path.join(dest_path, CLICKHOUSE_SERVER_PACKET_NAME.format(version=release.version)), version=release.version, type=release.type
),
out_path=os.path.join(
dest_path, CLICKHOUSE_SERVER_PACKET_NAME.format(version=release.version)
),
) )
download_packet( download_packet(
CLICKHOUSE_CLIENT_DOWNLOAD_URL.format(version=release.version, type=release.type), CLICKHOUSE_CLIENT_DOWNLOAD_URL.format(
out_path=os.path.join(dest_path, CLICKHOUSE_CLIENT_PACKET_NAME.format(version=release.version)), version=release.version, type=release.type
),
out_path=os.path.join(
dest_path, CLICKHOUSE_CLIENT_PACKET_NAME.format(version=release.version)
),
) )
@ -123,7 +158,7 @@ def download_previous_release(dest_path):
download_packets(current_release, dest_path=dest_path) download_packets(current_release, dest_path=dest_path)
if __name__ == '__main__': if __name__ == "__main__":
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
server_version = Version(input()) server_version = Version(input())
previous_release = get_previous_release(server_version) previous_release = get_previous_release(server_version)

View File

@ -7,7 +7,7 @@ from pr_info import PRInfo
from get_robot_token import get_best_robot_token from get_robot_token import get_best_robot_token
from commit_status_helper import get_commit from commit_status_helper import get_commit
NAME = 'Run Check (actions)' NAME = "Run Check (actions)"
def filter_statuses(statuses): def filter_statuses(statuses):
@ -36,4 +36,9 @@ if __name__ == "__main__":
url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}" url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}"
statuses = filter_statuses(list(commit.get_statuses())) statuses = filter_statuses(list(commit.get_statuses()))
if NAME in statuses and statuses[NAME].state == "pending": if NAME in statuses and statuses[NAME].state == "pending":
commit.create_status(context=NAME, description="All checks finished", state="success", target_url=url) commit.create_status(
context=NAME,
description="All checks finished",
state="success",
target_url=url,
)

View File

@ -17,26 +17,35 @@ from build_download_helper import download_all_deb_packages
from download_previous_release import download_previous_release from download_previous_release import download_previous_release
from upload_result_helper import upload_results from upload_result_helper import upload_results
from docker_pull_helper import get_image_with_version from docker_pull_helper import get_image_with_version
from commit_status_helper import post_commit_status, get_commit, override_status, post_commit_status_to_file from commit_status_helper import (
from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse post_commit_status,
get_commit,
override_status,
post_commit_status_to_file,
)
from clickhouse_helper import (
ClickHouseHelper,
mark_flaky_tests,
prepare_tests_results_for_clickhouse,
)
from stopwatch import Stopwatch from stopwatch import Stopwatch
from rerun_helper import RerunHelper from rerun_helper import RerunHelper
from tee_popen import TeePopen from tee_popen import TeePopen
NO_CHANGES_MSG = 'Nothing to run' NO_CHANGES_MSG = "Nothing to run"
def get_additional_envs(check_name, run_by_hash_num, run_by_hash_total): def get_additional_envs(check_name, run_by_hash_num, run_by_hash_total):
result = [] result = []
if 'DatabaseReplicated' in check_name: if "DatabaseReplicated" in check_name:
result.append("USE_DATABASE_REPLICATED=1") result.append("USE_DATABASE_REPLICATED=1")
if 'DatabaseOrdinary' in check_name: if "DatabaseOrdinary" in check_name:
result.append("USE_DATABASE_ORDINARY=1") result.append("USE_DATABASE_ORDINARY=1")
if 'wide parts enabled' in check_name: if "wide parts enabled" in check_name:
result.append("USE_POLYMORPHIC_PARTS=1") result.append("USE_POLYMORPHIC_PARTS=1")
#temporary # temporary
if 's3 storage' in check_name: if "s3 storage" in check_name:
result.append("USE_S3_STORAGE_FOR_MERGE_TREE=1") result.append("USE_S3_STORAGE_FOR_MERGE_TREE=1")
if run_by_hash_total != 0: if run_by_hash_total != 0:
@ -45,37 +54,55 @@ def get_additional_envs(check_name, run_by_hash_num, run_by_hash_total):
return result return result
def get_image_name(check_name): def get_image_name(check_name):
if 'stateless' in check_name.lower(): if "stateless" in check_name.lower():
return 'clickhouse/stateless-test' return "clickhouse/stateless-test"
if 'stateful' in check_name.lower(): if "stateful" in check_name.lower():
return 'clickhouse/stateful-test' return "clickhouse/stateful-test"
else: else:
raise Exception(f"Cannot deduce image name based on check name {check_name}") raise Exception(f"Cannot deduce image name based on check name {check_name}")
def get_run_command(builds_path, repo_tests_path, result_path, server_log_path, kill_timeout, additional_envs, image, flaky_check, tests_to_run): def get_run_command(
additional_options = ['--hung-check'] builds_path,
additional_options.append('--print-time') repo_tests_path,
result_path,
server_log_path,
kill_timeout,
additional_envs,
image,
flaky_check,
tests_to_run,
):
additional_options = ["--hung-check"]
additional_options.append("--print-time")
if tests_to_run: if tests_to_run:
additional_options += tests_to_run additional_options += tests_to_run
additional_options_str = '-e ADDITIONAL_OPTIONS="' + ' '.join(additional_options) + '"' additional_options_str = (
'-e ADDITIONAL_OPTIONS="' + " ".join(additional_options) + '"'
)
envs = [f'-e MAX_RUN_TIME={int(0.9 * kill_timeout)}', '-e S3_URL="https://clickhouse-datasets.s3.amazonaws.com"'] envs = [
f"-e MAX_RUN_TIME={int(0.9 * kill_timeout)}",
'-e S3_URL="https://clickhouse-datasets.s3.amazonaws.com"',
]
if flaky_check: if flaky_check:
envs += ['-e NUM_TRIES=100', '-e MAX_RUN_TIME=1800'] envs += ["-e NUM_TRIES=100", "-e MAX_RUN_TIME=1800"]
envs += [f'-e {e}' for e in additional_envs] envs += [f"-e {e}" for e in additional_envs]
env_str = ' '.join(envs) env_str = " ".join(envs)
return f"docker run --volume={builds_path}:/package_folder " \ return (
f"--volume={repo_tests_path}:/usr/share/clickhouse-test " \ f"docker run --volume={builds_path}:/package_folder "
f"--volume={result_path}:/test_output --volume={server_log_path}:/var/log/clickhouse-server " \ f"--volume={repo_tests_path}:/usr/share/clickhouse-test "
f"--volume={result_path}:/test_output --volume={server_log_path}:/var/log/clickhouse-server "
f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image}" f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image}"
)
def get_tests_to_run(pr_info): def get_tests_to_run(pr_info):
@ -85,32 +112,43 @@ def get_tests_to_run(pr_info):
return [] return []
for fpath in pr_info.changed_files: for fpath in pr_info.changed_files:
if 'tests/queries/0_stateless/0' in fpath: if "tests/queries/0_stateless/0" in fpath:
logging.info('File %s changed and seems like stateless test', fpath) logging.info("File %s changed and seems like stateless test", fpath)
fname = fpath.split('/')[3] fname = fpath.split("/")[3]
fname_without_ext = os.path.splitext(fname)[0] fname_without_ext = os.path.splitext(fname)[0]
result.add(fname_without_ext + '.') result.add(fname_without_ext + ".")
return list(result) return list(result)
def process_results(result_folder, server_log_path): def process_results(result_folder, server_log_path):
test_results = [] test_results = []
additional_files = [] additional_files = []
# Just upload all files from result_folder. # Just upload all files from result_folder.
# If task provides processed results, then it's responsible for content of result_folder. # If task provides processed results, then it's responsible for content of result_folder.
if os.path.exists(result_folder): if os.path.exists(result_folder):
test_files = [f for f in os.listdir(result_folder) if os.path.isfile(os.path.join(result_folder, f))] test_files = [
f
for f in os.listdir(result_folder)
if os.path.isfile(os.path.join(result_folder, f))
]
additional_files = [os.path.join(result_folder, f) for f in test_files] additional_files = [os.path.join(result_folder, f) for f in test_files]
if os.path.exists(server_log_path): if os.path.exists(server_log_path):
server_log_files = [f for f in os.listdir(server_log_path) if os.path.isfile(os.path.join(server_log_path, f))] server_log_files = [
additional_files = additional_files + [os.path.join(server_log_path, f) for f in server_log_files] f
for f in os.listdir(server_log_path)
if os.path.isfile(os.path.join(server_log_path, f))
]
additional_files = additional_files + [
os.path.join(server_log_path, f) for f in server_log_files
]
status = [] status = []
status_path = os.path.join(result_folder, "check_status.tsv") status_path = os.path.join(result_folder, "check_status.tsv")
if os.path.exists(status_path): if os.path.exists(status_path):
logging.info("Found test_results.tsv") logging.info("Found test_results.tsv")
with open(status_path, 'r', encoding='utf-8') as status_file: with open(status_path, "r", encoding="utf-8") as status_file:
status = list(csv.reader(status_file, delimiter='\t')) status = list(csv.reader(status_file, delimiter="\t"))
if len(status) != 1 or len(status[0]) != 2: if len(status) != 1 or len(status[0]) != 2:
logging.info("Files in result folder %s", os.listdir(result_folder)) logging.info("Files in result folder %s", os.listdir(result_folder))
@ -125,8 +163,8 @@ def process_results(result_folder, server_log_path):
logging.info("Files in result folder %s", os.listdir(result_folder)) logging.info("Files in result folder %s", os.listdir(result_folder))
return "error", "Not found test_results.tsv", test_results, additional_files return "error", "Not found test_results.tsv", test_results, additional_files
with open(results_path, 'r', encoding='utf-8') as results_file: with open(results_path, "r", encoding="utf-8") as results_file:
test_results = list(csv.reader(results_file, delimiter='\t')) test_results = list(csv.reader(results_file, delimiter="\t"))
if len(test_results) == 0: if len(test_results) == 0:
return "error", "Empty test_results.tsv", test_results, additional_files return "error", "Empty test_results.tsv", test_results, additional_files
@ -137,8 +175,17 @@ def parse_args():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("check_name") parser.add_argument("check_name")
parser.add_argument("kill_timeout", type=int) parser.add_argument("kill_timeout", type=int)
parser.add_argument("--validate-bugfix", action='store_true', help="Check that added tests failed on latest stable") parser.add_argument(
parser.add_argument("--post-commit-status", default='commit_status', choices=['commit_status', 'file'], help="Where to public post commit status") "--validate-bugfix",
action="store_true",
help="Check that added tests failed on latest stable",
)
parser.add_argument(
"--post-commit-status",
default="commit_status",
choices=["commit_status", "file"],
help="Where to public post commit status",
)
return parser.parse_args() return parser.parse_args()
@ -156,7 +203,7 @@ if __name__ == "__main__":
kill_timeout = args.kill_timeout kill_timeout = args.kill_timeout
validate_bugix_check = args.validate_bugfix validate_bugix_check = args.validate_bugfix
flaky_check = 'flaky' in check_name.lower() flaky_check = "flaky" in check_name.lower()
run_changed_tests = flaky_check or validate_bugix_check run_changed_tests = flaky_check or validate_bugix_check
gh = Github(get_best_robot_token()) gh = Github(get_best_robot_token())
@ -166,16 +213,23 @@ if __name__ == "__main__":
if not os.path.exists(temp_path): if not os.path.exists(temp_path):
os.makedirs(temp_path) os.makedirs(temp_path)
if validate_bugix_check and 'pr-bugfix' not in pr_info.labels: if validate_bugix_check and "pr-bugfix" not in pr_info.labels:
if args.post_commit_status == 'file': if args.post_commit_status == "file":
post_commit_status_to_file(os.path.join(temp_path, "post_commit_status.tsv"), 'Skipped (no pr-bugfix)', 'success', 'null') post_commit_status_to_file(
os.path.join(temp_path, "post_commit_status.tsv"),
"Skipped (no pr-bugfix)",
"success",
"null",
)
logging.info("Skipping '%s' (no pr-bugfix)", check_name) logging.info("Skipping '%s' (no pr-bugfix)", check_name)
sys.exit(0) sys.exit(0)
if 'RUN_BY_HASH_NUM' in os.environ: if "RUN_BY_HASH_NUM" in os.environ:
run_by_hash_num = int(os.getenv('RUN_BY_HASH_NUM')) run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM"))
run_by_hash_total = int(os.getenv('RUN_BY_HASH_TOTAL')) run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL"))
check_name_with_group = check_name + f' [{run_by_hash_num + 1}/{run_by_hash_total}]' check_name_with_group = (
check_name + f" [{run_by_hash_num + 1}/{run_by_hash_total}]"
)
else: else:
run_by_hash_num = 0 run_by_hash_num = 0
run_by_hash_total = 0 run_by_hash_total = 0
@ -191,12 +245,18 @@ if __name__ == "__main__":
tests_to_run = get_tests_to_run(pr_info) tests_to_run = get_tests_to_run(pr_info)
if not tests_to_run: if not tests_to_run:
commit = get_commit(gh, pr_info.sha) commit = get_commit(gh, pr_info.sha)
state = override_status('success', check_name, validate_bugix_check) state = override_status("success", check_name, validate_bugix_check)
if args.post_commit_status == 'commit_status': if args.post_commit_status == "commit_status":
commit.create_status(context=check_name_with_group, description=NO_CHANGES_MSG, state=state) commit.create_status(
elif args.post_commit_status == 'file': context=check_name_with_group,
description=NO_CHANGES_MSG,
state=state,
)
elif args.post_commit_status == "file":
fpath = os.path.join(temp_path, "post_commit_status.tsv") fpath = os.path.join(temp_path, "post_commit_status.tsv")
post_commit_status_to_file(fpath, description=NO_CHANGES_MSG, state=state, report_url='null') post_commit_status_to_file(
fpath, description=NO_CHANGES_MSG, state=state, report_url="null"
)
sys.exit(0) sys.exit(0)
image_name = get_image_name(check_name) image_name = get_image_name(check_name)
@ -223,11 +283,23 @@ if __name__ == "__main__":
run_log_path = os.path.join(result_path, "runlog.log") run_log_path = os.path.join(result_path, "runlog.log")
additional_envs = get_additional_envs(check_name, run_by_hash_num, run_by_hash_total) additional_envs = get_additional_envs(
check_name, run_by_hash_num, run_by_hash_total
)
if validate_bugix_check: if validate_bugix_check:
additional_envs.append('GLOBAL_TAGS=no-random-settings') additional_envs.append("GLOBAL_TAGS=no-random-settings")
run_command = get_run_command(packages_path, repo_tests_path, result_path, server_log_path, kill_timeout, additional_envs, docker_image, flaky_check, tests_to_run) run_command = get_run_command(
packages_path,
repo_tests_path,
result_path,
server_log_path,
kill_timeout,
additional_envs,
docker_image,
flaky_check,
tests_to_run,
)
logging.info("Going to run func tests: %s", run_command) logging.info("Going to run func tests: %s", run_command)
with TeePopen(run_command, run_log_path) as process: with TeePopen(run_command, run_log_path) as process:
@ -239,29 +311,55 @@ if __name__ == "__main__":
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
s3_helper = S3Helper('https://s3.amazonaws.com') s3_helper = S3Helper("https://s3.amazonaws.com")
state, description, test_results, additional_logs = process_results(result_path, server_log_path) state, description, test_results, additional_logs = process_results(
result_path, server_log_path
)
state = override_status(state, check_name, validate_bugix_check) state = override_status(state, check_name, validate_bugix_check)
ch_helper = ClickHouseHelper() ch_helper = ClickHouseHelper()
mark_flaky_tests(ch_helper, check_name, test_results) mark_flaky_tests(ch_helper, check_name, test_results)
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [run_log_path] + additional_logs, check_name_with_group) report_url = upload_results(
s3_helper,
pr_info.number,
pr_info.sha,
test_results,
[run_log_path] + additional_logs,
check_name_with_group,
)
print(f"::notice:: {check_name} Report url: {report_url}") print(f"::notice:: {check_name} Report url: {report_url}")
if args.post_commit_status == 'commit_status': if args.post_commit_status == "commit_status":
post_commit_status(gh, pr_info.sha, check_name_with_group, description, state, report_url) post_commit_status(
elif args.post_commit_status == 'file': gh, pr_info.sha, check_name_with_group, description, state, report_url
post_commit_status_to_file(os.path.join(temp_path, "post_commit_status.tsv"), description, state, report_url) )
elif args.post_commit_status == "file":
post_commit_status_to_file(
os.path.join(temp_path, "post_commit_status.tsv"),
description,
state,
report_url,
)
else: else:
raise Exception(f'Unknown post_commit_status option "{args.post_commit_status}"') raise Exception(
f'Unknown post_commit_status option "{args.post_commit_status}"'
)
prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name_with_group) prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
test_results,
state,
stopwatch.duration_seconds,
stopwatch.start_time_str,
report_url,
check_name_with_group,
)
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
if state != 'success': if state != "success":
if 'force-tests' in pr_info.labels: if "force-tests" in pr_info.labels:
print("'force-tests' enabled, will report success") print("'force-tests' enabled, will report success")
else: else:
sys.exit(1) sys.exit(1)

View File

@ -2,13 +2,15 @@
import boto3 # type: ignore import boto3 # type: ignore
from github import Github # type: ignore from github import Github # type: ignore
def get_parameter_from_ssm(name, decrypt=True, client=None): def get_parameter_from_ssm(name, decrypt=True, client=None):
if not client: if not client:
client = boto3.client('ssm', region_name='us-east-1') client = boto3.client("ssm", region_name="us-east-1")
return client.get_parameter(Name=name, WithDecryption=decrypt)['Parameter']['Value'] return client.get_parameter(Name=name, WithDecryption=decrypt)["Parameter"]["Value"]
def get_best_robot_token(token_prefix_env_name="github_robot_token_", total_tokens=4): def get_best_robot_token(token_prefix_env_name="github_robot_token_", total_tokens=4):
client = boto3.client('ssm', region_name='us-east-1') client = boto3.client("ssm", region_name="us-east-1")
tokens = {} tokens = {}
for i in range(1, total_tokens + 1): for i in range(1, total_tokens + 1):
token_name = token_prefix_env_name + str(i) token_name = token_prefix_env_name + str(i)

View File

@ -18,8 +18,16 @@ from build_download_helper import download_all_deb_packages
from download_previous_release import download_previous_release from download_previous_release import download_previous_release
from upload_result_helper import upload_results from upload_result_helper import upload_results
from docker_pull_helper import get_images_with_versions from docker_pull_helper import get_images_with_versions
from commit_status_helper import post_commit_status, override_status, post_commit_status_to_file from commit_status_helper import (
from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse post_commit_status,
override_status,
post_commit_status_to_file,
)
from clickhouse_helper import (
ClickHouseHelper,
mark_flaky_tests,
prepare_tests_results_for_clickhouse,
)
from stopwatch import Stopwatch from stopwatch import Stopwatch
from rerun_helper import RerunHelper from rerun_helper import RerunHelper
from tee_popen import TeePopen from tee_popen import TeePopen
@ -41,24 +49,28 @@ IMAGES = [
"clickhouse/dotnet-client", "clickhouse/dotnet-client",
] ]
def get_json_params_dict(check_name, pr_info, docker_images, run_by_hash_total, run_by_hash_num):
def get_json_params_dict(
check_name, pr_info, docker_images, run_by_hash_total, run_by_hash_num
):
return { return {
'context_name': check_name, "context_name": check_name,
'commit': pr_info.sha, "commit": pr_info.sha,
'pull_request': pr_info.number, "pull_request": pr_info.number,
'pr_info': {'changed_files' : list(pr_info.changed_files)}, "pr_info": {"changed_files": list(pr_info.changed_files)},
'docker_images_with_versions': docker_images, "docker_images_with_versions": docker_images,
'shuffle_test_groups': False, "shuffle_test_groups": False,
'use_tmpfs': False, "use_tmpfs": False,
'disable_net_host': True, "disable_net_host": True,
'run_by_hash_total': run_by_hash_total, "run_by_hash_total": run_by_hash_total,
'run_by_hash_num': run_by_hash_num, "run_by_hash_num": run_by_hash_num,
} }
def get_env_for_runner(build_path, repo_path, result_path, work_path): def get_env_for_runner(build_path, repo_path, result_path, work_path):
binary_path = os.path.join(build_path, 'clickhouse') binary_path = os.path.join(build_path, "clickhouse")
odbc_bridge_path = os.path.join(build_path, 'clickhouse-odbc-bridge') odbc_bridge_path = os.path.join(build_path, "clickhouse-odbc-bridge")
library_bridge_path = os.path.join(build_path, 'clickhouse-library-bridge') library_bridge_path = os.path.join(build_path, "clickhouse-library-bridge")
my_env = os.environ.copy() my_env = os.environ.copy()
my_env["CLICKHOUSE_TESTS_BUILD_PATH"] = build_path my_env["CLICKHOUSE_TESTS_BUILD_PATH"] = build_path
@ -70,25 +82,30 @@ def get_env_for_runner(build_path, repo_path, result_path, work_path):
my_env["CLICKHOUSE_TESTS_RESULT_PATH"] = result_path my_env["CLICKHOUSE_TESTS_RESULT_PATH"] = result_path
my_env["CLICKHOUSE_TESTS_BASE_CONFIG_DIR"] = f"{repo_path}/programs/server" my_env["CLICKHOUSE_TESTS_BASE_CONFIG_DIR"] = f"{repo_path}/programs/server"
my_env["CLICKHOUSE_TESTS_JSON_PARAMS_PATH"] = os.path.join(work_path, "params.json") my_env["CLICKHOUSE_TESTS_JSON_PARAMS_PATH"] = os.path.join(work_path, "params.json")
my_env["CLICKHOUSE_TESTS_RUNNER_RESTART_DOCKER"] = '0' my_env["CLICKHOUSE_TESTS_RUNNER_RESTART_DOCKER"] = "0"
return my_env return my_env
def process_results(result_folder): def process_results(result_folder):
test_results = [] test_results = []
additional_files = [] additional_files = []
# Just upload all files from result_folder. # Just upload all files from result_folder.
# If task provides processed results, then it's responsible for content of result_folder. # If task provides processed results, then it's responsible for content of result_folder.
if os.path.exists(result_folder): if os.path.exists(result_folder):
test_files = [f for f in os.listdir(result_folder) if os.path.isfile(os.path.join(result_folder, f))] test_files = [
f
for f in os.listdir(result_folder)
if os.path.isfile(os.path.join(result_folder, f))
]
additional_files = [os.path.join(result_folder, f) for f in test_files] additional_files = [os.path.join(result_folder, f) for f in test_files]
status = [] status = []
status_path = os.path.join(result_folder, "check_status.tsv") status_path = os.path.join(result_folder, "check_status.tsv")
if os.path.exists(status_path): if os.path.exists(status_path):
logging.info("Found test_results.tsv") logging.info("Found test_results.tsv")
with open(status_path, 'r', encoding='utf-8') as status_file: with open(status_path, "r", encoding="utf-8") as status_file:
status = list(csv.reader(status_file, delimiter='\t')) status = list(csv.reader(status_file, delimiter="\t"))
if len(status) != 1 or len(status[0]) != 2: if len(status) != 1 or len(status[0]) != 2:
logging.info("Files in result folder %s", os.listdir(result_folder)) logging.info("Files in result folder %s", os.listdir(result_folder))
@ -97,8 +114,8 @@ def process_results(result_folder):
results_path = os.path.join(result_folder, "test_results.tsv") results_path = os.path.join(result_folder, "test_results.tsv")
if os.path.exists(results_path): if os.path.exists(results_path):
with open(results_path, 'r', encoding='utf-8') as results_file: with open(results_path, "r", encoding="utf-8") as results_file:
test_results = list(csv.reader(results_file, delimiter='\t')) test_results = list(csv.reader(results_file, delimiter="\t"))
if len(test_results) == 0: if len(test_results) == 0:
return "error", "Empty test_results.tsv", test_results, additional_files return "error", "Empty test_results.tsv", test_results, additional_files
@ -108,8 +125,17 @@ def process_results(result_folder):
def parse_args(): def parse_args():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("check_name") parser.add_argument("check_name")
parser.add_argument("--validate-bugfix", action='store_true', help="Check that added tests failed on latest stable") parser.add_argument(
parser.add_argument("--post-commit-status", default='commit_status', choices=['commit_status', 'file'], help="Where to public post commit status") "--validate-bugfix",
action="store_true",
help="Check that added tests failed on latest stable",
)
parser.add_argument(
"--post-commit-status",
default="commit_status",
choices=["commit_status", "file"],
help="Where to public post commit status",
)
return parser.parse_args() return parser.parse_args()
@ -126,10 +152,12 @@ if __name__ == "__main__":
check_name = args.check_name check_name = args.check_name
validate_bugix_check = args.validate_bugfix validate_bugix_check = args.validate_bugfix
if 'RUN_BY_HASH_NUM' in os.environ: if "RUN_BY_HASH_NUM" in os.environ:
run_by_hash_num = int(os.getenv('RUN_BY_HASH_NUM')) run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM"))
run_by_hash_total = int(os.getenv('RUN_BY_HASH_TOTAL')) run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL"))
check_name_with_group = check_name + f' [{run_by_hash_num + 1}/{run_by_hash_total}]' check_name_with_group = (
check_name + f" [{run_by_hash_num + 1}/{run_by_hash_total}]"
)
else: else:
run_by_hash_num = 0 run_by_hash_num = 0
run_by_hash_total = 0 run_by_hash_total = 0
@ -138,12 +166,17 @@ if __name__ == "__main__":
if not os.path.exists(temp_path): if not os.path.exists(temp_path):
os.makedirs(temp_path) os.makedirs(temp_path)
is_flaky_check = 'flaky' in check_name is_flaky_check = "flaky" in check_name
pr_info = PRInfo(need_changed_files=is_flaky_check or validate_bugix_check) pr_info = PRInfo(need_changed_files=is_flaky_check or validate_bugix_check)
if validate_bugix_check and 'pr-bugfix' not in pr_info.labels: if validate_bugix_check and "pr-bugfix" not in pr_info.labels:
if args.post_commit_status == 'file': if args.post_commit_status == "file":
post_commit_status_to_file(os.path.join(temp_path, "post_commit_status.tsv"), 'Skipped (no pr-bugfix)', 'success', 'null') post_commit_status_to_file(
os.path.join(temp_path, "post_commit_status.tsv"),
"Skipped (no pr-bugfix)",
"success",
"null",
)
logging.info("Skipping '%s' (no pr-bugfix)", check_name) logging.info("Skipping '%s' (no pr-bugfix)", check_name)
sys.exit(0) sys.exit(0)
@ -175,9 +208,19 @@ if __name__ == "__main__":
my_env = get_env_for_runner(build_path, repo_path, result_path, work_path) my_env = get_env_for_runner(build_path, repo_path, result_path, work_path)
json_path = os.path.join(work_path, 'params.json') json_path = os.path.join(work_path, "params.json")
with open(json_path, 'w', encoding='utf-8') as json_params: with open(json_path, "w", encoding="utf-8") as json_params:
json_params.write(json.dumps(get_json_params_dict(check_name, pr_info, images_with_versions, run_by_hash_total, run_by_hash_num))) json_params.write(
json.dumps(
get_json_params_dict(
check_name,
pr_info,
images_with_versions,
run_by_hash_total,
run_by_hash_num,
)
)
)
output_path_log = os.path.join(result_path, "main_script_log.txt") output_path_log = os.path.join(result_path, "main_script_log.txt")
@ -199,16 +242,41 @@ if __name__ == "__main__":
ch_helper = ClickHouseHelper() ch_helper = ClickHouseHelper()
mark_flaky_tests(ch_helper, check_name, test_results) mark_flaky_tests(ch_helper, check_name, test_results)
s3_helper = S3Helper('https://s3.amazonaws.com') s3_helper = S3Helper("https://s3.amazonaws.com")
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [output_path_log] + additional_logs, check_name_with_group, False) report_url = upload_results(
s3_helper,
pr_info.number,
pr_info.sha,
test_results,
[output_path_log] + additional_logs,
check_name_with_group,
False,
)
print(f"::notice:: {check_name} Report url: {report_url}") print(f"::notice:: {check_name} Report url: {report_url}")
if args.post_commit_status == 'commit_status': if args.post_commit_status == "commit_status":
post_commit_status(gh, pr_info.sha, check_name_with_group, description, state, report_url) post_commit_status(
elif args.post_commit_status == 'file': gh, pr_info.sha, check_name_with_group, description, state, report_url
post_commit_status_to_file(os.path.join(temp_path, "post_commit_status.tsv"), description, state, report_url) )
elif args.post_commit_status == "file":
post_commit_status_to_file(
os.path.join(temp_path, "post_commit_status.tsv"),
description,
state,
report_url,
)
else: else:
raise Exception(f'Unknown post_commit_status option "{args.post_commit_status}"') raise Exception(
f'Unknown post_commit_status option "{args.post_commit_status}"'
)
prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name_with_group) prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
test_results,
state,
stopwatch.duration_seconds,
stopwatch.start_time_str,
report_url,
check_name_with_group,
)
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)

View File

@ -24,10 +24,10 @@ from ssh import SSHKey
from build_download_helper import get_build_name_for_check from build_download_helper import get_build_name_for_check
from rerun_helper import RerunHelper from rerun_helper import RerunHelper
JEPSEN_GROUP_NAME = 'jepsen_group' JEPSEN_GROUP_NAME = "jepsen_group"
DESIRED_INSTANCE_COUNT = 3 DESIRED_INSTANCE_COUNT = 3
IMAGE_NAME = 'clickhouse/keeper-jepsen-test' IMAGE_NAME = "clickhouse/keeper-jepsen-test"
CHECK_NAME = 'ClickHouse Keeper Jepsen (actions)' CHECK_NAME = "ClickHouse Keeper Jepsen (actions)"
SUCCESSFUL_TESTS_ANCHOR = "# Successful tests" SUCCESSFUL_TESTS_ANCHOR = "# Successful tests"
@ -35,45 +35,58 @@ INTERMINATE_TESTS_ANCHOR = "# Indeterminate tests"
CRASHED_TESTS_ANCHOR = "# Crashed tests" CRASHED_TESTS_ANCHOR = "# Crashed tests"
FAILED_TESTS_ANCHOR = "# Failed tests" FAILED_TESTS_ANCHOR = "# Failed tests"
def _parse_jepsen_output(path): def _parse_jepsen_output(path):
test_results = [] test_results = []
current_type = '' current_type = ""
with open(path, 'r') as f: with open(path, "r") as f:
for line in f: for line in f:
if SUCCESSFUL_TESTS_ANCHOR in line: if SUCCESSFUL_TESTS_ANCHOR in line:
current_type = 'OK' current_type = "OK"
elif INTERMINATE_TESTS_ANCHOR in line or CRASHED_TESTS_ANCHOR in line: elif INTERMINATE_TESTS_ANCHOR in line or CRASHED_TESTS_ANCHOR in line:
current_type = 'ERROR' current_type = "ERROR"
elif FAILED_TESTS_ANCHOR in line: elif FAILED_TESTS_ANCHOR in line:
current_type = 'FAIL' current_type = "FAIL"
if (line.startswith('store/clickhouse-keeper') or line.startswith('clickhouse-keeper')) and current_type: if (
line.startswith("store/clickhouse-keeper")
or line.startswith("clickhouse-keeper")
) and current_type:
test_results.append((line.strip(), current_type)) test_results.append((line.strip(), current_type))
return test_results return test_results
def get_autoscaling_group_instances_ids(asg_client, group_name): def get_autoscaling_group_instances_ids(asg_client, group_name):
group_description = asg_client.describe_auto_scaling_groups(AutoScalingGroupNames=[group_name]) group_description = asg_client.describe_auto_scaling_groups(
our_group = group_description['AutoScalingGroups'][0] AutoScalingGroupNames=[group_name]
)
our_group = group_description["AutoScalingGroups"][0]
instance_ids = [] instance_ids = []
for instance in our_group['Instances']: for instance in our_group["Instances"]:
if instance['LifecycleState'] == 'InService' and instance['HealthStatus'] == 'Healthy': if (
instance_ids.append(instance['InstanceId']) instance["LifecycleState"] == "InService"
and instance["HealthStatus"] == "Healthy"
):
instance_ids.append(instance["InstanceId"])
return instance_ids return instance_ids
def get_instances_addresses(ec2_client, instance_ids): def get_instances_addresses(ec2_client, instance_ids):
ec2_response = ec2_client.describe_instances(InstanceIds = instance_ids) ec2_response = ec2_client.describe_instances(InstanceIds=instance_ids)
instance_ips = [] instance_ips = []
for instances in ec2_response['Reservations']: for instances in ec2_response["Reservations"]:
for ip in instances['Instances']: for ip in instances["Instances"]:
instance_ips.append(ip['PrivateIpAddress']) instance_ips.append(ip["PrivateIpAddress"])
return instance_ips return instance_ips
def prepare_autoscaling_group_and_get_hostnames(): def prepare_autoscaling_group_and_get_hostnames():
asg_client = boto3.client('autoscaling', region_name='us-east-1') asg_client = boto3.client("autoscaling", region_name="us-east-1")
asg_client.set_desired_capacity(AutoScalingGroupName=JEPSEN_GROUP_NAME, DesiredCapacity=DESIRED_INSTANCE_COUNT) asg_client.set_desired_capacity(
AutoScalingGroupName=JEPSEN_GROUP_NAME, DesiredCapacity=DESIRED_INSTANCE_COUNT
)
instances = get_autoscaling_group_instances_ids(asg_client, JEPSEN_GROUP_NAME) instances = get_autoscaling_group_instances_ids(asg_client, JEPSEN_GROUP_NAME)
counter = 0 counter = 0
@ -84,13 +97,15 @@ def prepare_autoscaling_group_and_get_hostnames():
if counter > 30: if counter > 30:
raise Exception("Cannot wait autoscaling group") raise Exception("Cannot wait autoscaling group")
ec2_client = boto3.client('ec2', region_name='us-east-1') ec2_client = boto3.client("ec2", region_name="us-east-1")
return get_instances_addresses(ec2_client, instances) return get_instances_addresses(ec2_client, instances)
def clear_autoscaling_group(): def clear_autoscaling_group():
asg_client = boto3.client('autoscaling', region_name='us-east-1') asg_client = boto3.client("autoscaling", region_name="us-east-1")
asg_client.set_desired_capacity(AutoScalingGroupName=JEPSEN_GROUP_NAME, DesiredCapacity=0) asg_client.set_desired_capacity(
AutoScalingGroupName=JEPSEN_GROUP_NAME, DesiredCapacity=0
)
instances = get_autoscaling_group_instances_ids(asg_client, JEPSEN_GROUP_NAME) instances = get_autoscaling_group_instances_ids(asg_client, JEPSEN_GROUP_NAME)
counter = 0 counter = 0
while len(instances) > 0: while len(instances) > 0:
@ -103,15 +118,28 @@ def clear_autoscaling_group():
def save_nodes_to_file(instances, temp_path): def save_nodes_to_file(instances, temp_path):
nodes_path = os.path.join(temp_path, "nodes.txt") nodes_path = os.path.join(temp_path, "nodes.txt")
with open(nodes_path, 'w') as f: with open(nodes_path, "w") as f:
f.write("\n".join(instances)) f.write("\n".join(instances))
f.flush() f.flush()
return nodes_path return nodes_path
def get_run_command(ssh_auth_sock, ssh_sock_dir, pr_info, nodes_path, repo_path, build_url, result_path, docker_image):
return f"docker run --network=host -v '{ssh_sock_dir}:{ssh_sock_dir}' -e SSH_AUTH_SOCK={ssh_auth_sock} " \ def get_run_command(
f"-e PR_TO_TEST={pr_info.number} -e SHA_TO_TEST={pr_info.sha} -v '{nodes_path}:/nodes.txt' -v {result_path}:/test_output " \ ssh_auth_sock,
f"-e 'CLICKHOUSE_PACKAGE={build_url}' -v '{repo_path}:/ch' -e 'CLICKHOUSE_REPO_PATH=/ch' -e NODES_USERNAME=ubuntu {docker_image}" ssh_sock_dir,
pr_info,
nodes_path,
repo_path,
build_url,
result_path,
docker_image,
):
return (
f"docker run --network=host -v '{ssh_sock_dir}:{ssh_sock_dir}' -e SSH_AUTH_SOCK={ssh_auth_sock} "
f"-e PR_TO_TEST={pr_info.number} -e SHA_TO_TEST={pr_info.sha} -v '{nodes_path}:/nodes.txt' -v {result_path}:/test_output "
f"-e 'CLICKHOUSE_PACKAGE={build_url}' -v '{repo_path}:/ch' -e 'CLICKHOUSE_REPO_PATH=/ch' -e NODES_USERNAME=ubuntu {docker_image}"
)
if __name__ == "__main__": if __name__ == "__main__":
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
@ -120,9 +148,14 @@ if __name__ == "__main__":
pr_info = PRInfo() pr_info = PRInfo()
logging.info("Start at PR number %s, commit sha %s labels %s", pr_info.number, pr_info.sha, pr_info.labels) logging.info(
"Start at PR number %s, commit sha %s labels %s",
pr_info.number,
pr_info.sha,
pr_info.labels,
)
if pr_info.number != 0 and 'jepsen-test' not in pr_info.labels: if pr_info.number != 0 and "jepsen-test" not in pr_info.labels:
logging.info("Not jepsen test label in labels list, skipping") logging.info("Not jepsen test label in labels list, skipping")
sys.exit(0) sys.exit(0)
@ -167,13 +200,24 @@ if __name__ == "__main__":
head = requests.head(build_url) head = requests.head(build_url)
counter += 1 counter += 1
if counter >= 180: if counter >= 180:
post_commit_status(gh, pr_info.sha, CHECK_NAME, "Cannot fetch build to run", "error", "") post_commit_status(
gh, pr_info.sha, CHECK_NAME, "Cannot fetch build to run", "error", ""
)
raise Exception("Cannot fetch build") raise Exception("Cannot fetch build")
with SSHKey(key_value=get_parameter_from_ssm("jepsen_ssh_key") + '\n'): with SSHKey(key_value=get_parameter_from_ssm("jepsen_ssh_key") + "\n"):
ssh_auth_sock = os.environ['SSH_AUTH_SOCK'] ssh_auth_sock = os.environ["SSH_AUTH_SOCK"]
auth_sock_dir = os.path.dirname(ssh_auth_sock) auth_sock_dir = os.path.dirname(ssh_auth_sock)
cmd = get_run_command(ssh_auth_sock, auth_sock_dir, pr_info, nodes_path, REPO_COPY, build_url, result_path, docker_image) cmd = get_run_command(
ssh_auth_sock,
auth_sock_dir,
pr_info,
nodes_path,
REPO_COPY,
build_url,
result_path,
docker_image,
)
logging.info("Going to run jepsen: %s", cmd) logging.info("Going to run jepsen: %s", cmd)
run_log_path = os.path.join(TEMP_PATH, "runlog.log") run_log_path = os.path.join(TEMP_PATH, "runlog.log")
@ -185,31 +229,49 @@ if __name__ == "__main__":
else: else:
logging.info("Run failed") logging.info("Run failed")
status = 'success' status = "success"
description = 'No invalid analysis found ヽ(‘ー`)' description = "No invalid analysis found ヽ(‘ー`)"
jepsen_log_path = os.path.join(result_path, 'jepsen_run_all_tests.log') jepsen_log_path = os.path.join(result_path, "jepsen_run_all_tests.log")
additional_data = [] additional_data = []
try: try:
test_result = _parse_jepsen_output(jepsen_log_path) test_result = _parse_jepsen_output(jepsen_log_path)
if any(r[1] == 'FAIL' for r in test_result): if any(r[1] == "FAIL" for r in test_result):
status = 'failure' status = "failure"
description = 'Found invalid analysis (ノಥ益ಥ)ノ ┻━┻' description = "Found invalid analysis (ノಥ益ಥ)ノ ┻━┻"
compress_fast(os.path.join(result_path, 'store'), os.path.join(result_path, 'jepsen_store.tar.gz')) compress_fast(
additional_data.append(os.path.join(result_path, 'jepsen_store.tar.gz')) os.path.join(result_path, "store"),
os.path.join(result_path, "jepsen_store.tar.gz"),
)
additional_data.append(os.path.join(result_path, "jepsen_store.tar.gz"))
except Exception as ex: except Exception as ex:
print("Exception", ex) print("Exception", ex)
status = 'failure' status = "failure"
description = 'No Jepsen output log' description = "No Jepsen output log"
test_result = [('No Jepsen output log', 'FAIL')] test_result = [("No Jepsen output log", "FAIL")]
s3_helper = S3Helper('https://s3.amazonaws.com') s3_helper = S3Helper("https://s3.amazonaws.com")
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_result, [run_log_path] + additional_data, CHECK_NAME) report_url = upload_results(
s3_helper,
pr_info.number,
pr_info.sha,
test_result,
[run_log_path] + additional_data,
CHECK_NAME,
)
print(f"::notice ::Report url: {report_url}") print(f"::notice ::Report url: {report_url}")
post_commit_status(gh, pr_info.sha, CHECK_NAME, description, status, report_url) post_commit_status(gh, pr_info.sha, CHECK_NAME, description, status, report_url)
ch_helper = ClickHouseHelper() ch_helper = ClickHouseHelper()
prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_result, status, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, CHECK_NAME) prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
test_result,
status,
stopwatch.duration_seconds,
stopwatch.start_time_str,
report_url,
CHECK_NAME,
)
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
clear_autoscaling_group() clear_autoscaling_group()

View File

@ -19,14 +19,26 @@ from commit_status_helper import get_commit, post_commit_status
from tee_popen import TeePopen from tee_popen import TeePopen
from rerun_helper import RerunHelper from rerun_helper import RerunHelper
IMAGE_NAME = 'clickhouse/performance-comparison' IMAGE_NAME = "clickhouse/performance-comparison"
def get_run_command(workspace, result_path, repo_tests_path, pr_to_test, sha_to_test, additional_env, image):
return f"docker run --privileged --volume={workspace}:/workspace --volume={result_path}:/output " \ def get_run_command(
f"--volume={repo_tests_path}:/usr/share/clickhouse-test " \ workspace,
f"--cap-add syslog --cap-add sys_admin --cap-add sys_rawio " \ result_path,
f"-e PR_TO_TEST={pr_to_test} -e SHA_TO_TEST={sha_to_test} {additional_env} " \ repo_tests_path,
pr_to_test,
sha_to_test,
additional_env,
image,
):
return (
f"docker run --privileged --volume={workspace}:/workspace --volume={result_path}:/output "
f"--volume={repo_tests_path}:/usr/share/clickhouse-test "
f"--cap-add syslog --cap-add sys_admin --cap-add sys_rawio "
f"-e PR_TO_TEST={pr_to_test} -e SHA_TO_TEST={sha_to_test} {additional_env} "
f"{image}" f"{image}"
)
class RamDrive: class RamDrive:
def __init__(self, path, size): def __init__(self, path, size):
@ -37,11 +49,14 @@ class RamDrive:
if not os.path.exists(self.path): if not os.path.exists(self.path):
os.makedirs(self.path) os.makedirs(self.path)
subprocess.check_call(f"sudo mount -t tmpfs -o rw,size={self.size} tmpfs {self.path}", shell=True) subprocess.check_call(
f"sudo mount -t tmpfs -o rw,size={self.size} tmpfs {self.path}", shell=True
)
def __exit__(self, exc_type, exc_val, exc_tb): def __exit__(self, exc_type, exc_val, exc_tb):
subprocess.check_call(f"sudo umount {self.path}", shell=True) subprocess.check_call(f"sudo umount {self.path}", shell=True)
if __name__ == "__main__": if __name__ == "__main__":
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
temp_path = os.getenv("TEMP_PATH", os.path.abspath(".")) temp_path = os.getenv("TEMP_PATH", os.path.abspath("."))
@ -49,7 +64,7 @@ if __name__ == "__main__":
repo_tests_path = os.path.join(repo_path, "tests") repo_tests_path = os.path.join(repo_path, "tests")
ramdrive_path = os.getenv("RAMDRIVE_PATH", os.path.join(temp_path, "ramdrive")) ramdrive_path = os.getenv("RAMDRIVE_PATH", os.path.join(temp_path, "ramdrive"))
# currently unused, doesn't make tests more stable # currently unused, doesn't make tests more stable
ramdrive_size = os.getenv("RAMDRIVE_SIZE", '0G') ramdrive_size = os.getenv("RAMDRIVE_SIZE", "0G")
reports_path = os.getenv("REPORTS_PATH", "./reports") reports_path = os.getenv("REPORTS_PATH", "./reports")
check_name = sys.argv[1] check_name = sys.argv[1]
@ -57,14 +72,14 @@ if __name__ == "__main__":
if not os.path.exists(temp_path): if not os.path.exists(temp_path):
os.makedirs(temp_path) os.makedirs(temp_path)
with open(os.getenv('GITHUB_EVENT_PATH'), 'r', encoding='utf-8') as event_file: with open(os.getenv("GITHUB_EVENT_PATH"), "r", encoding="utf-8") as event_file:
event = json.load(event_file) event = json.load(event_file)
gh = Github(get_best_robot_token()) gh = Github(get_best_robot_token())
pr_info = PRInfo(event) pr_info = PRInfo(event)
commit = get_commit(gh, pr_info.sha) commit = get_commit(gh, pr_info.sha)
docker_env = '' docker_env = ""
docker_env += " -e S3_URL=https://s3.amazonaws.com/clickhouse-builds" docker_env += " -e S3_URL=https://s3.amazonaws.com/clickhouse-builds"
@ -75,13 +90,16 @@ if __name__ == "__main__":
task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}"
docker_env += ' -e CHPC_ADD_REPORT_LINKS="<a href={}>Job (actions)</a> <a href={}>Tested commit</a>"'.format( docker_env += ' -e CHPC_ADD_REPORT_LINKS="<a href={}>Job (actions)</a> <a href={}>Tested commit</a>"'.format(
task_url, pr_link) task_url, pr_link
)
if 'RUN_BY_HASH_TOTAL' in os.environ: if "RUN_BY_HASH_TOTAL" in os.environ:
run_by_hash_total = int(os.getenv('RUN_BY_HASH_TOTAL')) run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL"))
run_by_hash_num = int(os.getenv('RUN_BY_HASH_NUM')) run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM"))
docker_env += f' -e CHPC_TEST_RUN_BY_HASH_TOTAL={run_by_hash_total} -e CHPC_TEST_RUN_BY_HASH_NUM={run_by_hash_num}' docker_env += f" -e CHPC_TEST_RUN_BY_HASH_TOTAL={run_by_hash_total} -e CHPC_TEST_RUN_BY_HASH_NUM={run_by_hash_num}"
check_name_with_group = check_name + f' [{run_by_hash_num + 1}/{run_by_hash_total}]' check_name_with_group = (
check_name + f" [{run_by_hash_num + 1}/{run_by_hash_total}]"
)
else: else:
check_name_with_group = check_name check_name_with_group = check_name
@ -92,12 +110,20 @@ if __name__ == "__main__":
docker_image = get_image_with_version(reports_path, IMAGE_NAME) docker_image = get_image_with_version(reports_path, IMAGE_NAME)
#with RamDrive(ramdrive_path, ramdrive_size): # with RamDrive(ramdrive_path, ramdrive_size):
result_path = ramdrive_path result_path = ramdrive_path
if not os.path.exists(result_path): if not os.path.exists(result_path):
os.makedirs(result_path) os.makedirs(result_path)
run_command = get_run_command(result_path, result_path, repo_tests_path, pr_info.number, pr_info.sha, docker_env, docker_image) run_command = get_run_command(
result_path,
result_path,
repo_tests_path,
pr_info.number,
pr_info.sha,
docker_env,
docker_image,
)
logging.info("Going to run command %s", run_command) logging.info("Going to run command %s", run_command)
run_log_path = os.path.join(temp_path, "runlog.log") run_log_path = os.path.join(temp_path, "runlog.log")
with TeePopen(run_command, run_log_path) as process: with TeePopen(run_command, run_log_path) as process:
@ -110,74 +136,83 @@ if __name__ == "__main__":
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
paths = { paths = {
'compare.log': os.path.join(result_path, 'compare.log'), "compare.log": os.path.join(result_path, "compare.log"),
'output.7z': os.path.join(result_path, 'output.7z'), "output.7z": os.path.join(result_path, "output.7z"),
'report.html': os.path.join(result_path, 'report.html'), "report.html": os.path.join(result_path, "report.html"),
'all-queries.html': os.path.join(result_path, 'all-queries.html'), "all-queries.html": os.path.join(result_path, "all-queries.html"),
'queries.rep': os.path.join(result_path, 'queries.rep'), "queries.rep": os.path.join(result_path, "queries.rep"),
'all-query-metrics.tsv': os.path.join(result_path, 'report/all-query-metrics.tsv'), "all-query-metrics.tsv": os.path.join(
'runlog.log': run_log_path, result_path, "report/all-query-metrics.tsv"
),
"runlog.log": run_log_path,
} }
check_name_prefix = check_name_with_group.lower().replace(' ', '_').replace('(', '_').replace(')', '_').replace(',', '_') check_name_prefix = (
s3_prefix = f'{pr_info.number}/{pr_info.sha}/{check_name_prefix}/' check_name_with_group.lower()
s3_helper = S3Helper('https://s3.amazonaws.com') .replace(" ", "_")
.replace("(", "_")
.replace(")", "_")
.replace(",", "_")
)
s3_prefix = f"{pr_info.number}/{pr_info.sha}/{check_name_prefix}/"
s3_helper = S3Helper("https://s3.amazonaws.com")
for file in paths: for file in paths:
try: try:
paths[file] = s3_helper.upload_test_report_to_s3(paths[file], paths[file] = s3_helper.upload_test_report_to_s3(
s3_prefix + file) paths[file], s3_prefix + file
)
except Exception: except Exception:
paths[file] = '' paths[file] = ""
traceback.print_exc() traceback.print_exc()
# Upload all images and flamegraphs to S3 # Upload all images and flamegraphs to S3
try: try:
s3_helper.upload_test_folder_to_s3( s3_helper.upload_test_folder_to_s3(
os.path.join(result_path, 'images'), os.path.join(result_path, "images"), s3_prefix + "images"
s3_prefix + 'images'
) )
except Exception: except Exception:
traceback.print_exc() traceback.print_exc()
# Try to fetch status from the report. # Try to fetch status from the report.
status = '' status = ""
message = '' message = ""
try: try:
report_text = open(os.path.join(result_path, 'report.html'), 'r').read() report_text = open(os.path.join(result_path, "report.html"), "r").read()
status_match = re.search('<!--[ ]*status:(.*)-->', report_text) status_match = re.search("<!--[ ]*status:(.*)-->", report_text)
message_match = re.search('<!--[ ]*message:(.*)-->', report_text) message_match = re.search("<!--[ ]*message:(.*)-->", report_text)
if status_match: if status_match:
status = status_match.group(1).strip() status = status_match.group(1).strip()
if message_match: if message_match:
message = message_match.group(1).strip() message = message_match.group(1).strip()
# TODO: Remove me, always green mode for the first time # TODO: Remove me, always green mode for the first time
status = 'success' status = "success"
except Exception: except Exception:
traceback.print_exc() traceback.print_exc()
status = 'failure' status = "failure"
message = 'Failed to parse the report.' message = "Failed to parse the report."
if not status: if not status:
status = 'failure' status = "failure"
message = 'No status in report.' message = "No status in report."
elif not message: elif not message:
status = 'failure' status = "failure"
message = 'No message in report.' message = "No message in report."
report_url = task_url report_url = task_url
if paths['runlog.log']: if paths["runlog.log"]:
report_url = paths['runlog.log'] report_url = paths["runlog.log"]
if paths['compare.log']: if paths["compare.log"]:
report_url = paths['compare.log'] report_url = paths["compare.log"]
if paths['output.7z']: if paths["output.7z"]:
report_url = paths['output.7z'] report_url = paths["output.7z"]
if paths['report.html']: if paths["report.html"]:
report_url = paths['report.html'] report_url = paths["report.html"]
post_commit_status(
post_commit_status(gh, pr_info.sha, check_name_with_group, message, status, report_url) gh, pr_info.sha, check_name_with_group, message, status, report_url
)

View File

@ -92,27 +92,27 @@ HTML_TEST_PART = """
</table> </table>
""" """
BASE_HEADERS = ['Test name', 'Test status'] BASE_HEADERS = ["Test name", "Test status"]
class ReportColorTheme: class ReportColorTheme:
class ReportColor: class ReportColor:
yellow = '#FFB400' yellow = "#FFB400"
red = '#F00' red = "#F00"
green = '#0A0' green = "#0A0"
blue = '#00B4FF' blue = "#00B4FF"
default = (ReportColor.green, ReportColor.red, ReportColor.yellow) default = (ReportColor.green, ReportColor.red, ReportColor.yellow)
bugfixcheck = (ReportColor.yellow, ReportColor.blue, ReportColor.blue) bugfixcheck = (ReportColor.yellow, ReportColor.blue, ReportColor.blue)
def _format_header(header, branch_name, branch_url=None): def _format_header(header, branch_name, branch_url=None):
result = ' '.join([w.capitalize() for w in header.split(' ')]) result = " ".join([w.capitalize() for w in header.split(" ")])
result = result.replace("Clickhouse", "ClickHouse") result = result.replace("Clickhouse", "ClickHouse")
result = result.replace("clickhouse", "ClickHouse") result = result.replace("clickhouse", "ClickHouse")
if 'ClickHouse' not in result: if "ClickHouse" not in result:
result = 'ClickHouse ' + result result = "ClickHouse " + result
result += ' for ' result += " for "
if branch_url: if branch_url:
result += '<a href="{url}">{name}</a>'.format(url=branch_url, name=branch_name) result += '<a href="{url}">{name}</a>'.format(url=branch_url, name=branch_name)
else: else:
@ -121,27 +121,27 @@ def _format_header(header, branch_name, branch_url=None):
def _get_status_style(status, colortheme=None): def _get_status_style(status, colortheme=None):
ok_statuses = ('OK', 'success', 'PASSED') ok_statuses = ("OK", "success", "PASSED")
fail_statuses = ('FAIL', 'failure', 'error', 'FAILED', 'Timeout') fail_statuses = ("FAIL", "failure", "error", "FAILED", "Timeout")
if colortheme is None: if colortheme is None:
colortheme = ReportColorTheme.default colortheme = ReportColorTheme.default
style = "font-weight: bold;" style = "font-weight: bold;"
if status in ok_statuses: if status in ok_statuses:
style += f'color: {colortheme[0]};' style += f"color: {colortheme[0]};"
elif status in fail_statuses: elif status in fail_statuses:
style += f'color: {colortheme[1]};' style += f"color: {colortheme[1]};"
else: else:
style += f'color: {colortheme[2]};' style += f"color: {colortheme[2]};"
return style return style
def _get_html_url_name(url): def _get_html_url_name(url):
if isinstance(url, str): if isinstance(url, str):
return os.path.basename(url).replace('%2B', '+').replace('%20', ' ') return os.path.basename(url).replace("%2B", "+").replace("%20", " ")
if isinstance(url, tuple): if isinstance(url, tuple):
return url[1].replace('%2B', '+').replace('%20', ' ') return url[1].replace("%2B", "+").replace("%20", " ")
return None return None
@ -153,11 +153,24 @@ def _get_html_url(url):
if isinstance(url, tuple): if isinstance(url, tuple):
href, name = url[0], _get_html_url_name(url) href, name = url[0], _get_html_url_name(url)
if href and name: if href and name:
return '<a href="{href}">{name}</a>'.format(href=href, name=_get_html_url_name(url)) return '<a href="{href}">{name}</a>'.format(
return '' href=href, name=_get_html_url_name(url)
)
return ""
def create_test_html_report(header, test_result, raw_log_url, task_url, branch_url, branch_name, commit_url, additional_urls=None, with_raw_logs=False, statuscolors=None): def create_test_html_report(
header,
test_result,
raw_log_url,
task_url,
branch_url,
branch_name,
commit_url,
additional_urls=None,
with_raw_logs=False,
statuscolors=None,
):
if additional_urls is None: if additional_urls is None:
additional_urls = [] additional_urls = []
@ -181,9 +194,9 @@ def create_test_html_report(header, test_result, raw_log_url, task_url, branch_u
has_test_logs = True has_test_logs = True
row = "<tr>" row = "<tr>"
is_fail = test_status in ('FAIL', 'FLAKY') is_fail = test_status in ("FAIL", "FLAKY")
if is_fail and with_raw_logs and test_logs is not None: if is_fail and with_raw_logs and test_logs is not None:
row = "<tr class=\"failed\">" row = '<tr class="failed">'
row += "<td>" + test_name + "</td>" row += "<td>" + test_name + "</td>"
style = _get_status_style(test_status, colortheme=statuscolors) style = _get_status_style(test_status, colortheme=statuscolors)
@ -193,7 +206,13 @@ def create_test_html_report(header, test_result, raw_log_url, task_url, branch_u
num_fails = num_fails + 1 num_fails = num_fails + 1
is_fail_id = 'id="fail' + str(num_fails) + '" ' is_fail_id = 'id="fail' + str(num_fails) + '" '
row += '<td ' + is_fail_id + 'style="{}">'.format(style) + test_status + "</td>" row += (
"<td "
+ is_fail_id
+ 'style="{}">'.format(style)
+ test_status
+ "</td>"
)
if test_time is not None: if test_time is not None:
row += "<td>" + test_time + "</td>" row += "<td>" + test_time + "</td>"
@ -205,24 +224,26 @@ def create_test_html_report(header, test_result, raw_log_url, task_url, branch_u
row += "</tr>" row += "</tr>"
rows_part += row rows_part += row
if test_logs is not None and with_raw_logs: if test_logs is not None and with_raw_logs:
row = "<tr class=\"failed-content\">" row = '<tr class="failed-content">'
# TODO: compute colspan too # TODO: compute colspan too
row += "<td colspan=\"3\"><pre>" + test_logs + "</pre></td>" row += '<td colspan="3"><pre>' + test_logs + "</pre></td>"
row += "</tr>" row += "</tr>"
rows_part += row rows_part += row
headers = BASE_HEADERS headers = BASE_HEADERS
if has_test_time: if has_test_time:
headers.append('Test time, sec.') headers.append("Test time, sec.")
if has_test_logs and not with_raw_logs: if has_test_logs and not with_raw_logs:
headers.append('Logs') headers.append("Logs")
headers = ''.join(['<th>' + h + '</th>' for h in headers]) headers = "".join(["<th>" + h + "</th>" for h in headers])
test_part = HTML_TEST_PART.format(headers=headers, rows=rows_part) test_part = HTML_TEST_PART.format(headers=headers, rows=rows_part)
else: else:
test_part = "" test_part = ""
additional_html_urls = ' '.join([_get_html_url(url) for url in sorted(additional_urls, key=_get_html_url_name)]) additional_html_urls = " ".join(
[_get_html_url(url) for url in sorted(additional_urls, key=_get_html_url_name)]
)
result = HTML_BASE_TEST_TEMPLATE.format( result = HTML_BASE_TEST_TEMPLATE.format(
title=_format_header(header, branch_name), title=_format_header(header, branch_name),
@ -233,7 +254,7 @@ def create_test_html_report(header, test_result, raw_log_url, task_url, branch_u
test_part=test_part, test_part=test_part,
branch_name=branch_name, branch_name=branch_name,
commit_url=commit_url, commit_url=commit_url,
additional_urls=additional_html_urls additional_urls=additional_html_urls,
) )
return result return result
@ -297,9 +318,20 @@ tr:hover td {{filter: brightness(95%);}}
LINK_TEMPLATE = '<a href="{url}">{text}</a>' LINK_TEMPLATE = '<a href="{url}">{text}</a>'
def create_build_html_report(header, build_results, build_logs_urls, artifact_urls_list, task_url, branch_url, branch_name, commit_url): def create_build_html_report(
header,
build_results,
build_logs_urls,
artifact_urls_list,
task_url,
branch_url,
branch_name,
commit_url,
):
rows = "" rows = ""
for (build_result, build_log_url, artifact_urls) in zip(build_results, build_logs_urls, artifact_urls_list): for (build_result, build_log_url, artifact_urls) in zip(
build_results, build_logs_urls, artifact_urls_list
):
row = "<tr>" row = "<tr>"
row += "<td>{}</td>".format(build_result.compiler) row += "<td>{}</td>".format(build_result.compiler)
if build_result.build_type: if build_result.build_type:
@ -326,18 +358,20 @@ def create_build_html_report(header, build_results, build_logs_urls, artifact_ur
if build_result.elapsed_seconds: if build_result.elapsed_seconds:
delta = datetime.timedelta(seconds=build_result.elapsed_seconds) delta = datetime.timedelta(seconds=build_result.elapsed_seconds)
else: else:
delta = 'unknown' delta = "unknown"
row += '<td>{}</td>'.format(str(delta)) row += "<td>{}</td>".format(str(delta))
links = "" links = ""
link_separator = "<br/>" link_separator = "<br/>"
if artifact_urls: if artifact_urls:
for artifact_url in artifact_urls: for artifact_url in artifact_urls:
links += LINK_TEMPLATE.format(text=_get_html_url_name(artifact_url), url=artifact_url) links += LINK_TEMPLATE.format(
text=_get_html_url_name(artifact_url), url=artifact_url
)
links += link_separator links += link_separator
if links: if links:
links = links[:-len(link_separator)] links = links[: -len(link_separator)]
row += "<td>{}</td>".format(links) row += "<td>{}</td>".format(links)
row += "</tr>" row += "</tr>"
@ -348,4 +382,5 @@ def create_build_html_report(header, build_results, build_logs_urls, artifact_ur
rows=rows, rows=rows,
task_url=task_url, task_url=task_url,
branch_name=branch_name, branch_name=branch_name,
commit_url=commit_url) commit_url=commit_url,
)

View File

@ -2,6 +2,7 @@
from commit_status_helper import get_commit from commit_status_helper import get_commit
def _filter_statuses(statuses): def _filter_statuses(statuses):
""" """
Squash statuses to latest state Squash statuses to latest state
@ -19,7 +20,6 @@ def _filter_statuses(statuses):
class RerunHelper: class RerunHelper:
def __init__(self, gh, pr_info, check_name): def __init__(self, gh, pr_info, check_name):
self.gh = gh self.gh = gh
self.pr_info = pr_info self.pr_info = pr_info
@ -30,6 +30,9 @@ class RerunHelper:
def is_already_finished_by_status(self): def is_already_finished_by_status(self):
# currently we agree even for failed statuses # currently we agree even for failed statuses
for status in self.statuses: for status in self.statuses:
if self.check_name in status.context and status.state in ('success', 'failure'): if self.check_name in status.context and status.state in (
"success",
"failure",
):
return True return True
return False return False

View File

@ -34,30 +34,59 @@ def _flatten_list(lst):
class S3Helper: class S3Helper:
def __init__(self, host): def __init__(self, host):
self.session = boto3.session.Session(region_name='us-east-1') self.session = boto3.session.Session(region_name="us-east-1")
self.client = self.session.client('s3', endpoint_url=host) self.client = self.session.client("s3", endpoint_url=host)
def _upload_file_to_s3(self, bucket_name, file_path, s3_path): def _upload_file_to_s3(self, bucket_name, file_path, s3_path):
logging.debug("Start uploading %s to bucket=%s path=%s", file_path, bucket_name, s3_path) logging.debug(
"Start uploading %s to bucket=%s path=%s", file_path, bucket_name, s3_path
)
metadata = {} metadata = {}
if os.path.getsize(file_path) < 64 * 1024 * 1024: if os.path.getsize(file_path) < 64 * 1024 * 1024:
if s3_path.endswith("txt") or s3_path.endswith("log") or s3_path.endswith("err") or s3_path.endswith("out"): if (
metadata['ContentType'] = "text/plain; charset=utf-8" s3_path.endswith("txt")
logging.info("Content type %s for file path %s", "text/plain; charset=utf-8", file_path) or s3_path.endswith("log")
or s3_path.endswith("err")
or s3_path.endswith("out")
):
metadata["ContentType"] = "text/plain; charset=utf-8"
logging.info(
"Content type %s for file path %s",
"text/plain; charset=utf-8",
file_path,
)
elif s3_path.endswith("html"): elif s3_path.endswith("html"):
metadata['ContentType'] = "text/html; charset=utf-8" metadata["ContentType"] = "text/html; charset=utf-8"
logging.info("Content type %s for file path %s", "text/html; charset=utf-8", file_path) logging.info(
"Content type %s for file path %s",
"text/html; charset=utf-8",
file_path,
)
elif s3_path.endswith("css"): elif s3_path.endswith("css"):
metadata['ContentType'] = "text/css; charset=utf-8" metadata["ContentType"] = "text/css; charset=utf-8"
logging.info("Content type %s for file path %s", "text/css; charset=utf-8", file_path) logging.info(
"Content type %s for file path %s",
"text/css; charset=utf-8",
file_path,
)
elif s3_path.endswith("js"): elif s3_path.endswith("js"):
metadata['ContentType'] = "text/javascript; charset=utf-8" metadata["ContentType"] = "text/javascript; charset=utf-8"
logging.info("Content type %s for file path %s", "text/css; charset=utf-8", file_path) logging.info(
"Content type %s for file path %s",
"text/css; charset=utf-8",
file_path,
)
else: else:
logging.info("No content type provied for %s", file_path) logging.info("No content type provied for %s", file_path)
else: else:
if re.search(r'\.(txt|log|err|out)$', s3_path) or re.search(r'\.log\..*(?<!\.gz)$', s3_path): if re.search(r"\.(txt|log|err|out)$", s3_path) or re.search(
logging.info("Going to compress file log file %s to %s", file_path, file_path + ".gz") r"\.log\..*(?<!\.gz)$", s3_path
):
logging.info(
"Going to compress file log file %s to %s",
file_path,
file_path + ".gz",
)
compress_file_fast(file_path, file_path + ".gz") compress_file_fast(file_path, file_path + ".gz")
file_path += ".gz" file_path += ".gz"
s3_path += ".gz" s3_path += ".gz"
@ -69,14 +98,21 @@ class S3Helper:
logging.info("Upload %s to %s. Meta: %s", file_path, s3_path, metadata) logging.info("Upload %s to %s. Meta: %s", file_path, s3_path, metadata)
# last two replacements are specifics of AWS urls: # last two replacements are specifics of AWS urls:
# https://jamesd3142.wordpress.com/2018/02/28/amazon-s3-and-the-plus-symbol/ # https://jamesd3142.wordpress.com/2018/02/28/amazon-s3-and-the-plus-symbol/
return "https://s3.amazonaws.com/{bucket}/{path}".format(bucket=bucket_name, path=s3_path) \ return (
.replace('+', '%2B').replace(' ', '%20') "https://s3.amazonaws.com/{bucket}/{path}".format(
bucket=bucket_name, path=s3_path
)
.replace("+", "%2B")
.replace(" ", "%20")
)
def upload_test_report_to_s3(self, file_path, s3_path): def upload_test_report_to_s3(self, file_path, s3_path):
if CI: if CI:
return self._upload_file_to_s3(S3_TEST_REPORTS_BUCKET, file_path, s3_path) return self._upload_file_to_s3(S3_TEST_REPORTS_BUCKET, file_path, s3_path)
else: else:
return S3Helper.copy_file_to_local(S3_TEST_REPORTS_BUCKET, file_path, s3_path) return S3Helper.copy_file_to_local(
S3_TEST_REPORTS_BUCKET, file_path, s3_path
)
def upload_build_file_to_s3(self, file_path, s3_path): def upload_build_file_to_s3(self, file_path, s3_path):
if CI: if CI:
@ -96,6 +132,7 @@ class S3Helper:
counter = 0 counter = 0
t = time.time() t = time.time()
sum_time = 0 sum_time = 0
def upload_task(file_path): def upload_task(file_path):
nonlocal counter nonlocal counter
nonlocal t nonlocal t
@ -104,16 +141,18 @@ class S3Helper:
s3_path = file_path.replace(dir_path, s3_dir_path) s3_path = file_path.replace(dir_path, s3_dir_path)
metadata = {} metadata = {}
if s3_path.endswith("html"): if s3_path.endswith("html"):
metadata['ContentType'] = "text/html; charset=utf-8" metadata["ContentType"] = "text/html; charset=utf-8"
elif s3_path.endswith("css"): elif s3_path.endswith("css"):
metadata['ContentType'] = "text/css; charset=utf-8" metadata["ContentType"] = "text/css; charset=utf-8"
elif s3_path.endswith("js"): elif s3_path.endswith("js"):
metadata['ContentType'] = "text/javascript; charset=utf-8" metadata["ContentType"] = "text/javascript; charset=utf-8"
# Retry # Retry
for i in range(5): for i in range(5):
try: try:
self.client.upload_file(file_path, bucket_name, s3_path, ExtraArgs=metadata) self.client.upload_file(
file_path, bucket_name, s3_path, ExtraArgs=metadata
)
break break
except Exception as ex: except Exception as ex:
if i == 4: if i == 4:
@ -123,11 +162,22 @@ class S3Helper:
counter += 1 counter += 1
if counter % 1000 == 0: if counter % 1000 == 0:
sum_time += int(time.time() - t) sum_time += int(time.time() - t)
print("Uploaded", counter, "-", int(time.time() - t), "s", "sum time", sum_time, "s") print(
"Uploaded",
counter,
"-",
int(time.time() - t),
"s",
"sum time",
sum_time,
"s",
)
t = time.time() t = time.time()
except Exception as ex: except Exception as ex:
logging.critical("Failed to upload file, expcetion %s", ex) logging.critical("Failed to upload file, expcetion %s", ex)
return "https://s3.amazonaws.com/{bucket}/{path}".format(bucket=bucket_name, path=s3_path) return "https://s3.amazonaws.com/{bucket}/{path}".format(
bucket=bucket_name, path=s3_path
)
p = Pool(256) p = Pool(256)
@ -136,8 +186,20 @@ class S3Helper:
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
return result return result
def _upload_folder_to_s3(self, folder_path, s3_folder_path, bucket_name, keep_dirs_in_s3_path, upload_symlinks): def _upload_folder_to_s3(
logging.info("Upload folder '%s' to bucket=%s of s3 folder '%s'", folder_path, bucket_name, s3_folder_path) self,
folder_path,
s3_folder_path,
bucket_name,
keep_dirs_in_s3_path,
upload_symlinks,
):
logging.info(
"Upload folder '%s' to bucket=%s of s3 folder '%s'",
folder_path,
bucket_name,
s3_folder_path,
)
if not os.path.exists(folder_path): if not os.path.exists(folder_path):
return [] return []
files = os.listdir(folder_path) files = os.listdir(folder_path)
@ -154,44 +216,81 @@ class S3Helper:
full_s3_path = s3_folder_path full_s3_path = s3_folder_path
if os.path.isdir(full_fs_path): if os.path.isdir(full_fs_path):
return self._upload_folder_to_s3(full_fs_path, full_s3_path, bucket_name, keep_dirs_in_s3_path, return self._upload_folder_to_s3(
upload_symlinks) full_fs_path,
full_s3_path,
bucket_name,
keep_dirs_in_s3_path,
upload_symlinks,
)
if os.path.islink(full_fs_path): if os.path.islink(full_fs_path):
if upload_symlinks: if upload_symlinks:
if CI: if CI:
return self._upload_file_to_s3(bucket_name, full_fs_path, full_s3_path + "/" + file_name) return self._upload_file_to_s3(
bucket_name, full_fs_path, full_s3_path + "/" + file_name
)
else: else:
return S3Helper.copy_file_to_local(bucket_name, full_fs_path, full_s3_path + "/" + file_name) return S3Helper.copy_file_to_local(
bucket_name, full_fs_path, full_s3_path + "/" + file_name
)
return [] return []
if CI: if CI:
return self._upload_file_to_s3(bucket_name, full_fs_path, full_s3_path + "/" + file_name) return self._upload_file_to_s3(
bucket_name, full_fs_path, full_s3_path + "/" + file_name
)
else: else:
return S3Helper.copy_file_to_local(bucket_name, full_fs_path, full_s3_path + "/" + file_name) return S3Helper.copy_file_to_local(
bucket_name, full_fs_path, full_s3_path + "/" + file_name
)
return sorted(_flatten_list(list(p.map(task, files)))) return sorted(_flatten_list(list(p.map(task, files))))
def upload_build_folder_to_s3(self, folder_path, s3_folder_path, keep_dirs_in_s3_path=True, upload_symlinks=True): def upload_build_folder_to_s3(
return self._upload_folder_to_s3(folder_path, s3_folder_path, S3_BUILDS_BUCKET, keep_dirs_in_s3_path, self,
upload_symlinks) folder_path,
s3_folder_path,
keep_dirs_in_s3_path=True,
upload_symlinks=True,
):
return self._upload_folder_to_s3(
folder_path,
s3_folder_path,
S3_BUILDS_BUCKET,
keep_dirs_in_s3_path,
upload_symlinks,
)
def upload_test_folder_to_s3(self, folder_path, s3_folder_path, keep_dirs_in_s3_path=True, upload_symlinks=True): def upload_test_folder_to_s3(
return self._upload_folder_to_s3(folder_path, s3_folder_path, S3_TEST_REPORTS_BUCKET, keep_dirs_in_s3_path, self,
upload_symlinks) folder_path,
s3_folder_path,
keep_dirs_in_s3_path=True,
upload_symlinks=True,
):
return self._upload_folder_to_s3(
folder_path,
s3_folder_path,
S3_TEST_REPORTS_BUCKET,
keep_dirs_in_s3_path,
upload_symlinks,
)
def list_prefix(self, s3_prefix_path, bucket=S3_BUILDS_BUCKET): def list_prefix(self, s3_prefix_path, bucket=S3_BUILDS_BUCKET):
objects = self.client.list_objects_v2(Bucket=bucket, Prefix=s3_prefix_path) objects = self.client.list_objects_v2(Bucket=bucket, Prefix=s3_prefix_path)
result = [] result = []
if 'Contents' in objects: if "Contents" in objects:
for obj in objects['Contents']: for obj in objects["Contents"]:
result.append(obj['Key']) result.append(obj["Key"])
return result return result
@staticmethod @staticmethod
def copy_file_to_local(bucket_name, file_path, s3_path): def copy_file_to_local(bucket_name, file_path, s3_path):
local_path = os.path.abspath(os.path.join(RUNNER_TEMP, 's3', bucket_name, s3_path)) local_path = os.path.abspath(
os.path.join(RUNNER_TEMP, "s3", bucket_name, s3_path)
)
local_dir = os.path.dirname(local_path) local_dir = os.path.dirname(local_path)
if not os.path.exists(local_dir): if not os.path.exists(local_dir):
os.makedirs(local_dir) os.makedirs(local_dir)

View File

@ -23,19 +23,20 @@ from rerun_helper import RerunHelper
DOCKER_IMAGE = "clickhouse/split-build-smoke-test" DOCKER_IMAGE = "clickhouse/split-build-smoke-test"
DOWNLOAD_RETRIES_COUNT = 5 DOWNLOAD_RETRIES_COUNT = 5
RESULT_LOG_NAME = "run.log" RESULT_LOG_NAME = "run.log"
CHECK_NAME = 'Split build smoke test (actions)' CHECK_NAME = "Split build smoke test (actions)"
def process_result(result_folder, server_log_folder): def process_result(result_folder, server_log_folder):
status = "success" status = "success"
description = 'Server started and responded' description = "Server started and responded"
summary = [("Smoke test", "OK")] summary = [("Smoke test", "OK")]
with open(os.path.join(result_folder, RESULT_LOG_NAME), 'r') as run_log: with open(os.path.join(result_folder, RESULT_LOG_NAME), "r") as run_log:
lines = run_log.read().split('\n') lines = run_log.read().split("\n")
if not lines or lines[0].strip() != 'OK': if not lines or lines[0].strip() != "OK":
status = "failure" status = "failure"
logging.info("Lines is not ok: %s", str('\n'.join(lines))) logging.info("Lines is not ok: %s", str("\n".join(lines)))
summary = [("Smoke test", "FAIL")] summary = [("Smoke test", "FAIL")]
description = 'Server failed to respond, see result in logs' description = "Server failed to respond, see result in logs"
result_logs = [] result_logs = []
server_log_path = os.path.join(server_log_folder, "clickhouse-server.log") server_log_path = os.path.join(server_log_folder, "clickhouse-server.log")
@ -43,17 +44,25 @@ def process_result(result_folder, server_log_folder):
client_stderr_log_path = os.path.join(result_folder, "clientstderr.log") client_stderr_log_path = os.path.join(result_folder, "clientstderr.log")
run_log_path = os.path.join(result_folder, RESULT_LOG_NAME) run_log_path = os.path.join(result_folder, RESULT_LOG_NAME)
for path in [server_log_path, stderr_log_path, client_stderr_log_path, run_log_path]: for path in [
server_log_path,
stderr_log_path,
client_stderr_log_path,
run_log_path,
]:
if os.path.exists(path): if os.path.exists(path):
result_logs.append(path) result_logs.append(path)
return status, description, summary, result_logs return status, description, summary, result_logs
def get_run_command(build_path, result_folder, server_log_folder, docker_image): def get_run_command(build_path, result_folder, server_log_folder, docker_image):
return f"docker run --network=host --volume={build_path}:/package_folder" \ return (
f" --volume={server_log_folder}:/var/log/clickhouse-server" \ f"docker run --network=host --volume={build_path}:/package_folder"
f" --volume={result_folder}:/test_output" \ f" --volume={server_log_folder}:/var/log/clickhouse-server"
f" {docker_image} >{result_folder}/{RESULT_LOG_NAME}" f" --volume={result_folder}:/test_output"
f" {docker_image} >{result_folder}/{RESULT_LOG_NAME}"
)
if __name__ == "__main__": if __name__ == "__main__":
@ -76,8 +85,8 @@ if __name__ == "__main__":
for root, _, files in os.walk(reports_path): for root, _, files in os.walk(reports_path):
for f in files: for f in files:
if f == 'changed_images.json': if f == "changed_images.json":
images_path = os.path.join(root, 'changed_images.json') images_path = os.path.join(root, "changed_images.json")
break break
docker_image = get_image_with_version(reports_path, DOCKER_IMAGE) docker_image = get_image_with_version(reports_path, DOCKER_IMAGE)
@ -96,7 +105,9 @@ if __name__ == "__main__":
if not os.path.exists(result_path): if not os.path.exists(result_path):
os.makedirs(result_path) os.makedirs(result_path)
run_command = get_run_command(packages_path, result_path, server_log_path, docker_image) run_command = get_run_command(
packages_path, result_path, server_log_path, docker_image
)
logging.info("Going to run command %s", run_command) logging.info("Going to run command %s", run_command)
with subprocess.Popen(run_command, shell=True) as process: with subprocess.Popen(run_command, shell=True) as process:
@ -110,13 +121,30 @@ if __name__ == "__main__":
print("Result path", os.listdir(result_path)) print("Result path", os.listdir(result_path))
print("Server log path", os.listdir(server_log_path)) print("Server log path", os.listdir(server_log_path))
state, description, test_results, additional_logs = process_result(result_path, server_log_path) state, description, test_results, additional_logs = process_result(
result_path, server_log_path
)
ch_helper = ClickHouseHelper() ch_helper = ClickHouseHelper()
s3_helper = S3Helper('https://s3.amazonaws.com') s3_helper = S3Helper("https://s3.amazonaws.com")
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_logs, CHECK_NAME) report_url = upload_results(
s3_helper,
pr_info.number,
pr_info.sha,
test_results,
additional_logs,
CHECK_NAME,
)
print(f"::notice ::Report url: {report_url}") print(f"::notice ::Report url: {report_url}")
post_commit_status(gh, pr_info.sha, CHECK_NAME, description, state, report_url) post_commit_status(gh, pr_info.sha, CHECK_NAME, description, state, report_url)
prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, CHECK_NAME) prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
test_results,
state,
stopwatch.duration_seconds,
stopwatch.start_time_str,
report_url,
CHECK_NAME,
)
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)

View File

@ -27,15 +27,19 @@ class SSHAgent:
self._env_backup["SSH_OPTIONS"] = os.environ.get("SSH_OPTIONS") self._env_backup["SSH_OPTIONS"] = os.environ.get("SSH_OPTIONS")
# set ENV from stdout of ssh-agent # set ENV from stdout of ssh-agent
for line in self._run(['ssh-agent']).splitlines(): for line in self._run(["ssh-agent"]).splitlines():
name, _, value = line.partition(b"=") name, _, value = line.partition(b"=")
if _ == b"=": if _ == b"=":
value = value.split(b";", 1)[0] value = value.split(b";", 1)[0]
self._env[name.decode()] = value.decode() self._env[name.decode()] = value.decode()
os.environ[name.decode()] = value.decode() os.environ[name.decode()] = value.decode()
ssh_options = "," + os.environ["SSH_OPTIONS"] if os.environ.get("SSH_OPTIONS") else "" ssh_options = (
os.environ["SSH_OPTIONS"] = f"{ssh_options}UserKnownHostsFile=/dev/null,StrictHostKeyChecking=no" "," + os.environ["SSH_OPTIONS"] if os.environ.get("SSH_OPTIONS") else ""
)
os.environ[
"SSH_OPTIONS"
] = f"{ssh_options}UserKnownHostsFile=/dev/null,StrictHostKeyChecking=no"
def add(self, key): def add(self, key):
key_pub = self._key_pub(key) key_pub = self._key_pub(key)
@ -89,7 +93,13 @@ class SSHAgent:
@staticmethod @staticmethod
def _run(cmd, stdin=None): def _run(cmd, stdin=None):
shell = isinstance(cmd, str) shell = isinstance(cmd, str)
with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE if stdin else None, shell=shell) as p: with subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdin=subprocess.PIPE if stdin else None,
shell=shell,
) as p:
stdout, stderr = p.communicate(stdin) stdout, stderr = p.communicate(stdin)
if stdout.strip().decode() == "The agent has no identities.": if stdout.strip().decode() == "The agent has no identities.":
@ -101,6 +111,7 @@ class SSHAgent:
return stdout return stdout
class SSHKey: class SSHKey:
def __init__(self, key_name=None, key_value=None): def __init__(self, key_name=None, key_value=None):
if key_name is None and key_value is None: if key_name is None and key_value is None:

View File

@ -2,7 +2,8 @@
import datetime import datetime
class Stopwatch():
class Stopwatch:
def __init__(self): def __init__(self):
self.start_time = datetime.datetime.utcnow() self.start_time = datetime.datetime.utcnow()
self.start_time_str_value = self.start_time.strftime("%Y-%m-%d %H:%M:%S") self.start_time_str_value = self.start_time.strftime("%Y-%m-%d %H:%M:%S")

View File

@ -8,18 +8,19 @@ import json
import time import time
from collections import namedtuple from collections import namedtuple
def get_key_and_app_from_aws(): def get_key_and_app_from_aws():
import boto3 import boto3
secret_name = "clickhouse_github_secret_key" secret_name = "clickhouse_github_secret_key"
session = boto3.session.Session() session = boto3.session.Session()
client = session.client( client = session.client(
service_name='secretsmanager', service_name="secretsmanager",
) )
get_secret_value_response = client.get_secret_value( get_secret_value_response = client.get_secret_value(SecretId=secret_name)
SecretId=secret_name data = json.loads(get_secret_value_response["SecretString"])
) return data["clickhouse-app-key"], int(data["clickhouse-app-id"])
data = json.loads(get_secret_value_response['SecretString'])
return data['clickhouse-app-key'], int(data['clickhouse-app-id'])
def get_installation_id(jwt_token): def get_installation_id(jwt_token):
headers = { headers = {
@ -29,117 +30,152 @@ def get_installation_id(jwt_token):
response = requests.get("https://api.github.com/app/installations", headers=headers) response = requests.get("https://api.github.com/app/installations", headers=headers)
response.raise_for_status() response.raise_for_status()
data = response.json() data = response.json()
return data[0]['id'] return data[0]["id"]
def get_access_token(jwt_token, installation_id): def get_access_token(jwt_token, installation_id):
headers = { headers = {
"Authorization": f"Bearer {jwt_token}", "Authorization": f"Bearer {jwt_token}",
"Accept": "application/vnd.github.v3+json", "Accept": "application/vnd.github.v3+json",
} }
response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers) response = requests.post(
f"https://api.github.com/app/installations/{installation_id}/access_tokens",
headers=headers,
)
response.raise_for_status() response.raise_for_status()
data = response.json() data = response.json()
return data['token'] return data["token"]
RunnerDescription = namedtuple('RunnerDescription', ['id', 'name', 'tags', 'offline', 'busy']) RunnerDescription = namedtuple(
"RunnerDescription", ["id", "name", "tags", "offline", "busy"]
)
def list_runners(access_token): def list_runners(access_token):
headers = { headers = {
"Authorization": f"token {access_token}", "Authorization": f"token {access_token}",
"Accept": "application/vnd.github.v3+json", "Accept": "application/vnd.github.v3+json",
} }
response = requests.get("https://api.github.com/orgs/ClickHouse/actions/runners?per_page=100", headers=headers) response = requests.get(
"https://api.github.com/orgs/ClickHouse/actions/runners?per_page=100",
headers=headers,
)
response.raise_for_status() response.raise_for_status()
data = response.json() data = response.json()
total_runners = data['total_count'] total_runners = data["total_count"]
runners = data['runners'] runners = data["runners"]
total_pages = int(total_runners / 100 + 1) total_pages = int(total_runners / 100 + 1)
for i in range(2, total_pages + 1): for i in range(2, total_pages + 1):
response = requests.get(f"https://api.github.com/orgs/ClickHouse/actions/runners?page={i}&per_page=100", headers=headers) response = requests.get(
f"https://api.github.com/orgs/ClickHouse/actions/runners?page={i}&per_page=100",
headers=headers,
)
response.raise_for_status() response.raise_for_status()
data = response.json() data = response.json()
runners += data['runners'] runners += data["runners"]
print("Total runners", len(runners)) print("Total runners", len(runners))
result = [] result = []
for runner in runners: for runner in runners:
tags = [tag['name'] for tag in runner['labels']] tags = [tag["name"] for tag in runner["labels"]]
desc = RunnerDescription(id=runner['id'], name=runner['name'], tags=tags, desc = RunnerDescription(
offline=runner['status']=='offline', busy=runner['busy']) id=runner["id"],
name=runner["name"],
tags=tags,
offline=runner["status"] == "offline",
busy=runner["busy"],
)
result.append(desc) result.append(desc)
return result return result
def push_metrics_to_cloudwatch(listed_runners, namespace): def push_metrics_to_cloudwatch(listed_runners, namespace):
import boto3 import boto3
client = boto3.client('cloudwatch')
client = boto3.client("cloudwatch")
metrics_data = [] metrics_data = []
busy_runners = sum(1 for runner in listed_runners if runner.busy) busy_runners = sum(1 for runner in listed_runners if runner.busy)
metrics_data.append({ metrics_data.append(
'MetricName': 'BusyRunners', {
'Value': busy_runners, "MetricName": "BusyRunners",
'Unit': 'Count', "Value": busy_runners,
}) "Unit": "Count",
}
)
total_active_runners = sum(1 for runner in listed_runners if not runner.offline) total_active_runners = sum(1 for runner in listed_runners if not runner.offline)
metrics_data.append({ metrics_data.append(
'MetricName': 'ActiveRunners', {
'Value': total_active_runners, "MetricName": "ActiveRunners",
'Unit': 'Count', "Value": total_active_runners,
}) "Unit": "Count",
}
)
total_runners = len(listed_runners) total_runners = len(listed_runners)
metrics_data.append({ metrics_data.append(
'MetricName': 'TotalRunners', {
'Value': total_runners, "MetricName": "TotalRunners",
'Unit': 'Count', "Value": total_runners,
}) "Unit": "Count",
}
)
if total_active_runners == 0: if total_active_runners == 0:
busy_ratio = 100 busy_ratio = 100
else: else:
busy_ratio = busy_runners / total_active_runners * 100 busy_ratio = busy_runners / total_active_runners * 100
metrics_data.append({ metrics_data.append(
'MetricName': 'BusyRunnersRatio', {
'Value': busy_ratio, "MetricName": "BusyRunnersRatio",
'Unit': 'Percent', "Value": busy_ratio,
}) "Unit": "Percent",
}
)
client.put_metric_data(Namespace='RunnersMetrics', MetricData=metrics_data) client.put_metric_data(Namespace="RunnersMetrics", MetricData=metrics_data)
def how_many_instances_to_kill(event_data): def how_many_instances_to_kill(event_data):
data_array = event_data['CapacityToTerminate'] data_array = event_data["CapacityToTerminate"]
to_kill_by_zone = {} to_kill_by_zone = {}
for av_zone in data_array: for av_zone in data_array:
zone_name = av_zone['AvailabilityZone'] zone_name = av_zone["AvailabilityZone"]
to_kill = av_zone['Capacity'] to_kill = av_zone["Capacity"]
if zone_name not in to_kill_by_zone: if zone_name not in to_kill_by_zone:
to_kill_by_zone[zone_name] = 0 to_kill_by_zone[zone_name] = 0
to_kill_by_zone[zone_name] += to_kill to_kill_by_zone[zone_name] += to_kill
return to_kill_by_zone return to_kill_by_zone
def get_candidates_to_be_killed(event_data): def get_candidates_to_be_killed(event_data):
data_array = event_data['Instances'] data_array = event_data["Instances"]
instances_by_zone = {} instances_by_zone = {}
for instance in data_array: for instance in data_array:
zone_name = instance['AvailabilityZone'] zone_name = instance["AvailabilityZone"]
instance_id = instance['InstanceId'] instance_id = instance["InstanceId"]
if zone_name not in instances_by_zone: if zone_name not in instances_by_zone:
instances_by_zone[zone_name] = [] instances_by_zone[zone_name] = []
instances_by_zone[zone_name].append(instance_id) instances_by_zone[zone_name].append(instance_id)
return instances_by_zone return instances_by_zone
def delete_runner(access_token, runner): def delete_runner(access_token, runner):
headers = { headers = {
"Authorization": f"token {access_token}", "Authorization": f"token {access_token}",
"Accept": "application/vnd.github.v3+json", "Accept": "application/vnd.github.v3+json",
} }
response = requests.delete(f"https://api.github.com/orgs/ClickHouse/actions/runners/{runner.id}", headers=headers) response = requests.delete(
f"https://api.github.com/orgs/ClickHouse/actions/runners/{runner.id}",
headers=headers,
)
response.raise_for_status() response.raise_for_status()
print(f"Response code deleting {runner.name} with id {runner.id} is {response.status_code}") print(
f"Response code deleting {runner.name} with id {runner.id} is {response.status_code}"
)
return response.status_code == 204 return response.status_code == 204
@ -166,12 +202,16 @@ def main(github_secret_key, github_app_id, event):
num_to_kill = to_kill_by_zone[zone] num_to_kill = to_kill_by_zone[zone]
candidates = instances_by_zone[zone] candidates = instances_by_zone[zone]
if num_to_kill > len(candidates): if num_to_kill > len(candidates):
raise Exception(f"Required to kill {num_to_kill}, but have only {len(candidates)} candidates in AV {zone}") raise Exception(
f"Required to kill {num_to_kill}, but have only {len(candidates)} candidates in AV {zone}"
)
delete_for_av = [] delete_for_av = []
for candidate in candidates: for candidate in candidates:
if candidate not in set([runner.name for runner in runners]): if candidate not in set([runner.name for runner in runners]):
print(f"Candidate {candidate} was not in runners list, simply delete it") print(
f"Candidate {candidate} was not in runners list, simply delete it"
)
instances_to_kill.append(candidate) instances_to_kill.append(candidate)
for candidate in candidates: for candidate in candidates:
@ -183,57 +223,76 @@ def main(github_secret_key, github_app_id, event):
for runner in runners: for runner in runners:
if runner.name == candidate: if runner.name == candidate:
if not runner.busy: if not runner.busy:
print(f"Runner {runner.name} is not busy and can be deleted from AV {zone}") print(
f"Runner {runner.name} is not busy and can be deleted from AV {zone}"
)
delete_for_av.append(runner) delete_for_av.append(runner)
else: else:
print(f"Runner {runner.name} is busy, not going to delete it") print(f"Runner {runner.name} is busy, not going to delete it")
break break
if len(delete_for_av) < num_to_kill: if len(delete_for_av) < num_to_kill:
print(f"Checked all candidates for av {zone}, get to delete {len(delete_for_av)}, but still cannot get required {num_to_kill}") print(
f"Checked all candidates for av {zone}, get to delete {len(delete_for_av)}, but still cannot get required {num_to_kill}"
)
to_delete_runners += delete_for_av to_delete_runners += delete_for_av
print("Got instances to kill: ", ', '.join(instances_to_kill)) print("Got instances to kill: ", ", ".join(instances_to_kill))
print("Going to delete runners:", ', '.join([runner.name for runner in to_delete_runners])) print(
"Going to delete runners:",
", ".join([runner.name for runner in to_delete_runners]),
)
for runner in to_delete_runners: for runner in to_delete_runners:
if delete_runner(access_token, runner): if delete_runner(access_token, runner):
print(f"Runner with name {runner.name} and id {runner.id} successfuly deleted from github") print(
f"Runner with name {runner.name} and id {runner.id} successfuly deleted from github"
)
instances_to_kill.append(runner.name) instances_to_kill.append(runner.name)
else: else:
print(f"Cannot delete {runner.name} from github") print(f"Cannot delete {runner.name} from github")
## push metrics ## push metrics
#runners = list_runners(access_token) # runners = list_runners(access_token)
#push_metrics_to_cloudwatch(runners, 'RunnersMetrics') # push_metrics_to_cloudwatch(runners, 'RunnersMetrics')
response = { response = {"InstanceIDs": instances_to_kill}
"InstanceIDs": instances_to_kill
}
print(response) print(response)
return response return response
def handler(event, context): def handler(event, context):
private_key, app_id = get_key_and_app_from_aws() private_key, app_id = get_key_and_app_from_aws()
return main(private_key, app_id, event) return main(private_key, app_id, event)
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Get list of runners and their states') parser = argparse.ArgumentParser(description="Get list of runners and their states")
parser.add_argument('-p', '--private-key-path', help='Path to file with private key') parser.add_argument(
parser.add_argument('-k', '--private-key', help='Private key') "-p", "--private-key-path", help="Path to file with private key"
parser.add_argument('-a', '--app-id', type=int, help='GitHub application ID', required=True) )
parser.add_argument("-k", "--private-key", help="Private key")
parser.add_argument(
"-a", "--app-id", type=int, help="GitHub application ID", required=True
)
args = parser.parse_args() args = parser.parse_args()
if not args.private_key_path and not args.private_key: if not args.private_key_path and not args.private_key:
print("Either --private-key-path or --private-key must be specified", file=sys.stderr) print(
"Either --private-key-path or --private-key must be specified",
file=sys.stderr,
)
if args.private_key_path and args.private_key: if args.private_key_path and args.private_key:
print("Either --private-key-path or --private-key must be specified", file=sys.stderr) print(
"Either --private-key-path or --private-key must be specified",
file=sys.stderr,
)
if args.private_key: if args.private_key:
private_key = args.private_key private_key = args.private_key
else: else:
with open(args.private_key_path, 'r') as key_file: with open(args.private_key_path, "r") as key_file:
private_key = key_file.read() private_key = key_file.read()
sample_event = { sample_event = {
@ -243,41 +302,41 @@ if __name__ == "__main__":
{ {
"AvailabilityZone": "us-east-1b", "AvailabilityZone": "us-east-1b",
"Capacity": 1, "Capacity": 1,
"InstanceMarketOption": "OnDemand" "InstanceMarketOption": "OnDemand",
}, },
{ {
"AvailabilityZone": "us-east-1c", "AvailabilityZone": "us-east-1c",
"Capacity": 2, "Capacity": 2,
"InstanceMarketOption": "OnDemand" "InstanceMarketOption": "OnDemand",
} },
], ],
"Instances": [ "Instances": [
{ {
"AvailabilityZone": "us-east-1b", "AvailabilityZone": "us-east-1b",
"InstanceId": "i-08d0b3c1a137e02a5", "InstanceId": "i-08d0b3c1a137e02a5",
"InstanceType": "t2.nano", "InstanceType": "t2.nano",
"InstanceMarketOption": "OnDemand" "InstanceMarketOption": "OnDemand",
}, },
{ {
"AvailabilityZone": "us-east-1c", "AvailabilityZone": "us-east-1c",
"InstanceId": "ip-172-31-45-253.eu-west-1.compute.internal", "InstanceId": "ip-172-31-45-253.eu-west-1.compute.internal",
"InstanceType": "t2.nano", "InstanceType": "t2.nano",
"InstanceMarketOption": "OnDemand" "InstanceMarketOption": "OnDemand",
}, },
{ {
"AvailabilityZone": "us-east-1c", "AvailabilityZone": "us-east-1c",
"InstanceId": "ip-172-31-27-227.eu-west-1.compute.internal", "InstanceId": "ip-172-31-27-227.eu-west-1.compute.internal",
"InstanceType": "t2.nano", "InstanceType": "t2.nano",
"InstanceMarketOption": "OnDemand" "InstanceMarketOption": "OnDemand",
}, },
{ {
"AvailabilityZone": "us-east-1c", "AvailabilityZone": "us-east-1c",
"InstanceId": "ip-172-31-45-253.eu-west-1.compute.internal", "InstanceId": "ip-172-31-45-253.eu-west-1.compute.internal",
"InstanceType": "t2.nano", "InstanceType": "t2.nano",
"InstanceMarketOption": "OnDemand" "InstanceMarketOption": "OnDemand",
} },
], ],
"Cause": "SCALE_IN" "Cause": "SCALE_IN",
} }
main(private_key, args.app_id, sample_event) main(private_key, args.app_id, sample_event)

View File

@ -7,6 +7,7 @@ import sys
import json import json
import time import time
def get_installation_id(jwt_token): def get_installation_id(jwt_token):
headers = { headers = {
"Authorization": f"Bearer {jwt_token}", "Authorization": f"Bearer {jwt_token}",
@ -15,40 +16,48 @@ def get_installation_id(jwt_token):
response = requests.get("https://api.github.com/app/installations", headers=headers) response = requests.get("https://api.github.com/app/installations", headers=headers)
response.raise_for_status() response.raise_for_status()
data = response.json() data = response.json()
return data[0]['id'] return data[0]["id"]
def get_access_token(jwt_token, installation_id): def get_access_token(jwt_token, installation_id):
headers = { headers = {
"Authorization": f"Bearer {jwt_token}", "Authorization": f"Bearer {jwt_token}",
"Accept": "application/vnd.github.v3+json", "Accept": "application/vnd.github.v3+json",
} }
response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers) response = requests.post(
f"https://api.github.com/app/installations/{installation_id}/access_tokens",
headers=headers,
)
response.raise_for_status() response.raise_for_status()
data = response.json() data = response.json()
return data['token'] return data["token"]
def get_runner_registration_token(access_token): def get_runner_registration_token(access_token):
headers = { headers = {
"Authorization": f"token {access_token}", "Authorization": f"token {access_token}",
"Accept": "application/vnd.github.v3+json", "Accept": "application/vnd.github.v3+json",
} }
response = requests.post("https://api.github.com/orgs/ClickHouse/actions/runners/registration-token", headers=headers) response = requests.post(
"https://api.github.com/orgs/ClickHouse/actions/runners/registration-token",
headers=headers,
)
response.raise_for_status() response.raise_for_status()
data = response.json() data = response.json()
return data['token'] return data["token"]
def get_key_and_app_from_aws(): def get_key_and_app_from_aws():
import boto3 import boto3
secret_name = "clickhouse_github_secret_key" secret_name = "clickhouse_github_secret_key"
session = boto3.session.Session() session = boto3.session.Session()
client = session.client( client = session.client(
service_name='secretsmanager', service_name="secretsmanager",
) )
get_secret_value_response = client.get_secret_value( get_secret_value_response = client.get_secret_value(SecretId=secret_name)
SecretId=secret_name data = json.loads(get_secret_value_response["SecretString"])
) return data["clickhouse-app-key"], int(data["clickhouse-app-id"])
data = json.loads(get_secret_value_response['SecretString'])
return data['clickhouse-app-key'], int(data['clickhouse-app-id'])
def main(github_secret_key, github_app_id, push_to_ssm, ssm_parameter_name): def main(github_secret_key, github_app_id, push_to_ssm, ssm_parameter_name):
@ -67,40 +76,65 @@ def main(github_secret_key, github_app_id, push_to_ssm, ssm_parameter_name):
import boto3 import boto3
print("Trying to put params into ssm manager") print("Trying to put params into ssm manager")
client = boto3.client('ssm') client = boto3.client("ssm")
client.put_parameter( client.put_parameter(
Name=ssm_parameter_name, Name=ssm_parameter_name,
Value=runner_registration_token, Value=runner_registration_token,
Type='SecureString', Type="SecureString",
Overwrite=True) Overwrite=True,
)
else: else:
print("Not push token to AWS Parameter Store, just print:", runner_registration_token) print(
"Not push token to AWS Parameter Store, just print:",
runner_registration_token,
)
def handler(event, context): def handler(event, context):
private_key, app_id = get_key_and_app_from_aws() private_key, app_id = get_key_and_app_from_aws()
main(private_key, app_id, True, 'github_runner_registration_token') main(private_key, app_id, True, "github_runner_registration_token")
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Get new token from github to add runners') parser = argparse.ArgumentParser(
parser.add_argument('-p', '--private-key-path', help='Path to file with private key') description="Get new token from github to add runners"
parser.add_argument('-k', '--private-key', help='Private key') )
parser.add_argument('-a', '--app-id', type=int, help='GitHub application ID', required=True) parser.add_argument(
parser.add_argument('--push-to-ssm', action='store_true', help='Store received token in parameter store') "-p", "--private-key-path", help="Path to file with private key"
parser.add_argument('--ssm-parameter-name', default='github_runner_registration_token', help='AWS paramater store parameter name') )
parser.add_argument("-k", "--private-key", help="Private key")
parser.add_argument(
"-a", "--app-id", type=int, help="GitHub application ID", required=True
)
parser.add_argument(
"--push-to-ssm",
action="store_true",
help="Store received token in parameter store",
)
parser.add_argument(
"--ssm-parameter-name",
default="github_runner_registration_token",
help="AWS paramater store parameter name",
)
args = parser.parse_args() args = parser.parse_args()
if not args.private_key_path and not args.private_key: if not args.private_key_path and not args.private_key:
print("Either --private-key-path or --private-key must be specified", file=sys.stderr) print(
"Either --private-key-path or --private-key must be specified",
file=sys.stderr,
)
if args.private_key_path and args.private_key: if args.private_key_path and args.private_key:
print("Either --private-key-path or --private-key must be specified", file=sys.stderr) print(
"Either --private-key-path or --private-key must be specified",
file=sys.stderr,
)
if args.private_key: if args.private_key:
private_key = args.private_key private_key = args.private_key
else: else:
with open(args.private_key_path, 'r') as key_file: with open(args.private_key_path, "r") as key_file:
private_key = key_file.read() private_key = key_file.read()
main(private_key, args.app_id, args.push_to_ssm, args.ssm_parameter_name) main(private_key, args.app_id, args.push_to_ssm, args.ssm_parameter_name)

View File

@ -15,32 +15,38 @@ from build_download_helper import download_unit_tests
from upload_result_helper import upload_results from upload_result_helper import upload_results
from docker_pull_helper import get_image_with_version from docker_pull_helper import get_image_with_version
from commit_status_helper import post_commit_status from commit_status_helper import post_commit_status
from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse from clickhouse_helper import (
ClickHouseHelper,
mark_flaky_tests,
prepare_tests_results_for_clickhouse,
)
from stopwatch import Stopwatch from stopwatch import Stopwatch
from rerun_helper import RerunHelper from rerun_helper import RerunHelper
from tee_popen import TeePopen from tee_popen import TeePopen
IMAGE_NAME = 'clickhouse/unit-test' IMAGE_NAME = "clickhouse/unit-test"
def get_test_name(line): def get_test_name(line):
elements = reversed(line.split(' ')) elements = reversed(line.split(" "))
for element in elements: for element in elements:
if '(' not in element and ')' not in element: if "(" not in element and ")" not in element:
return element return element
raise Exception(f"No test name in line '{line}'") raise Exception(f"No test name in line '{line}'")
def process_result(result_folder): def process_result(result_folder):
OK_SIGN = 'OK ]' OK_SIGN = "OK ]"
FAILED_SIGN = 'FAILED ]' FAILED_SIGN = "FAILED ]"
SEGFAULT = 'Segmentation fault' SEGFAULT = "Segmentation fault"
SIGNAL = 'received signal SIG' SIGNAL = "received signal SIG"
PASSED = 'PASSED' PASSED = "PASSED"
summary = [] summary = []
total_counter = 0 total_counter = 0
failed_counter = 0 failed_counter = 0
result_log_path = f'{result_folder}/test_result.txt' result_log_path = f"{result_folder}/test_result.txt"
if not os.path.exists(result_log_path): if not os.path.exists(result_log_path):
logging.info("No output log on path %s", result_log_path) logging.info("No output log on path %s", result_log_path)
return "error", "No output log", summary, [] return "error", "No output log", summary, []
@ -48,7 +54,7 @@ def process_result(result_folder):
status = "success" status = "success"
description = "" description = ""
passed = False passed = False
with open(result_log_path, 'r', encoding='utf-8') as test_result: with open(result_log_path, "r", encoding="utf-8") as test_result:
for line in test_result: for line in test_result:
if OK_SIGN in line: if OK_SIGN in line:
logging.info("Found ok line: '%s'", line) logging.info("Found ok line: '%s'", line)
@ -56,7 +62,7 @@ def process_result(result_folder):
logging.info("Test name: '%s'", test_name) logging.info("Test name: '%s'", test_name)
summary.append((test_name, "OK")) summary.append((test_name, "OK"))
total_counter += 1 total_counter += 1
elif FAILED_SIGN in line and 'listed below' not in line and 'ms)' in line: elif FAILED_SIGN in line and "listed below" not in line and "ms)" in line:
logging.info("Found fail line: '%s'", line) logging.info("Found fail line: '%s'", line)
test_name = get_test_name(line.strip()) test_name = get_test_name(line.strip())
logging.info("Test name: '%s'", test_name) logging.info("Test name: '%s'", test_name)
@ -85,7 +91,9 @@ def process_result(result_folder):
status = "failure" status = "failure"
if not description: if not description:
description += f"fail: {failed_counter}, passed: {total_counter - failed_counter}" description += (
f"fail: {failed_counter}, passed: {total_counter - failed_counter}"
)
return status, description, summary, [result_log_path] return status, description, summary, [result_log_path]
@ -139,15 +147,30 @@ if __name__ == "__main__":
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
s3_helper = S3Helper('https://s3.amazonaws.com') s3_helper = S3Helper("https://s3.amazonaws.com")
state, description, test_results, additional_logs = process_result(test_output) state, description, test_results, additional_logs = process_result(test_output)
ch_helper = ClickHouseHelper() ch_helper = ClickHouseHelper()
mark_flaky_tests(ch_helper, check_name, test_results) mark_flaky_tests(ch_helper, check_name, test_results)
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [run_log_path] + additional_logs, check_name) report_url = upload_results(
s3_helper,
pr_info.number,
pr_info.sha,
test_results,
[run_log_path] + additional_logs,
check_name,
)
print(f"::notice ::Report url: {report_url}") print(f"::notice ::Report url: {report_url}")
post_commit_status(gh, pr_info.sha, check_name, description, state, report_url) post_commit_status(gh, pr_info.sha, check_name, description, state, report_url)
prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name) prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
test_results,
state,
stopwatch.duration_seconds,
stopwatch.start_time_str,
report_url,
check_name,
)
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)

View File

@ -6,7 +6,9 @@ from env_helper import GITHUB_SERVER_URL, GITHUB_REPOSITORY, GITHUB_RUN_ID
from report import ReportColorTheme, create_test_html_report from report import ReportColorTheme, create_test_html_report
def process_logs(s3_client, additional_logs, s3_path_prefix, test_results, with_raw_logs): def process_logs(
s3_client, additional_logs, s3_path_prefix, test_results, with_raw_logs
):
processed_logs = {} processed_logs = {}
# Firstly convert paths of logs from test_results to urls to s3. # Firstly convert paths of logs from test_results to urls to s3.
for test_result in test_results: for test_result in test_results:
@ -21,8 +23,8 @@ def process_logs(s3_client, additional_logs, s3_path_prefix, test_results, with_
test_log_urls.append(processed_logs[log_path]) test_log_urls.append(processed_logs[log_path])
elif log_path: elif log_path:
url = s3_client.upload_test_report_to_s3( url = s3_client.upload_test_report_to_s3(
log_path, log_path, s3_path_prefix + "/" + os.path.basename(log_path)
s3_path_prefix + "/" + os.path.basename(log_path)) )
test_log_urls.append(url) test_log_urls.append(url)
processed_logs[log_path] = url processed_logs[log_path] = url
@ -33,15 +35,29 @@ def process_logs(s3_client, additional_logs, s3_path_prefix, test_results, with_
if log_path: if log_path:
additional_urls.append( additional_urls.append(
s3_client.upload_test_report_to_s3( s3_client.upload_test_report_to_s3(
log_path, log_path, s3_path_prefix + "/" + os.path.basename(log_path)
s3_path_prefix + "/" + os.path.basename(log_path))) )
)
return additional_urls return additional_urls
def upload_results(s3_client, pr_number, commit_sha, test_results, additional_files, check_name, with_raw_logs=True, statuscolors=None): def upload_results(
s3_path_prefix = f"{pr_number}/{commit_sha}/" + check_name.lower().replace(' ', '_').replace('(', '_').replace(')', '_').replace(',', '_') s3_client,
additional_urls = process_logs(s3_client, additional_files, s3_path_prefix, test_results, with_raw_logs) pr_number,
commit_sha,
test_results,
additional_files,
check_name,
with_raw_logs=True,
statuscolors=None,
):
s3_path_prefix = f"{pr_number}/{commit_sha}/" + check_name.lower().replace(
" ", "_"
).replace("(", "_").replace(")", "_").replace(",", "_")
additional_urls = process_logs(
s3_client, additional_files, s3_path_prefix, test_results, with_raw_logs
)
branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commits/master" branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commits/master"
branch_name = "master" branch_name = "master"
@ -58,14 +74,25 @@ def upload_results(s3_client, pr_number, commit_sha, test_results, additional_fi
else: else:
raw_log_url = task_url raw_log_url = task_url
statuscolors = ReportColorTheme.bugfixcheck if 'bugfix validate check' in check_name else None statuscolors = (
ReportColorTheme.bugfixcheck if "bugfix validate check" in check_name else None
)
html_report = create_test_html_report(check_name, test_results, raw_log_url, html_report = create_test_html_report(
task_url, branch_url, branch_name, commit_url, check_name,
additional_urls, with_raw_logs, statuscolors=statuscolors) test_results,
with open('report.html', 'w', encoding='utf-8') as f: raw_log_url,
task_url,
branch_url,
branch_name,
commit_url,
additional_urls,
with_raw_logs,
statuscolors=statuscolors,
)
with open("report.html", "w", encoding="utf-8") as f:
f.write(html_report) f.write(html_report)
url = s3_client.upload_test_report_to_s3('report.html', s3_path_prefix + ".html") url = s3_client.upload_test_report_to_s3("report.html", s3_path_prefix + ".html")
logging.info("Search result in url %s", url) logging.info("Search result in url %s", url)
return url return url

View File

@ -27,7 +27,7 @@ MAX_TIME_SECONDS = 3600
MAX_TIME_IN_SANDBOX = 20 * 60 # 20 minutes MAX_TIME_IN_SANDBOX = 20 * 60 # 20 minutes
TASK_TIMEOUT = 8 * 60 * 60 # 8 hours TASK_TIMEOUT = 8 * 60 * 60 # 8 hours
NO_CHANGES_MSG = 'Nothing to run' NO_CHANGES_MSG = "Nothing to run"
def stringhash(s): def stringhash(s):
@ -209,7 +209,9 @@ class ClickhouseIntegrationTestsRunner:
self.image_versions = self.params["docker_images_with_versions"] self.image_versions = self.params["docker_images_with_versions"]
self.shuffle_groups = self.params["shuffle_test_groups"] self.shuffle_groups = self.params["shuffle_test_groups"]
self.flaky_check = "flaky check" in self.params["context_name"] self.flaky_check = "flaky check" in self.params["context_name"]
self.bugfix_validate_check = "bugfix validate check" in self.params["context_name"] self.bugfix_validate_check = (
"bugfix validate check" in self.params["context_name"]
)
# if use_tmpfs is not set we assume it to be true, otherwise check # if use_tmpfs is not set we assume it to be true, otherwise check
self.use_tmpfs = "use_tmpfs" not in self.params or self.params["use_tmpfs"] self.use_tmpfs = "use_tmpfs" not in self.params or self.params["use_tmpfs"]
self.disable_net_host = ( self.disable_net_host = (
@ -780,7 +782,9 @@ class ClickhouseIntegrationTestsRunner:
def run_impl(self, repo_path, build_path): def run_impl(self, repo_path, build_path):
if self.flaky_check or self.bugfix_validate_check: if self.flaky_check or self.bugfix_validate_check:
return self.run_flaky_check(repo_path, build_path, should_fail=self.bugfix_validate_check) return self.run_flaky_check(
repo_path, build_path, should_fail=self.bugfix_validate_check
)
self._install_clickhouse(build_path) self._install_clickhouse(build_path)
logging.info( logging.info(

View File

@ -5,23 +5,34 @@ import os
from helpers.test_tools import TSV from helpers.test_tools import TSV
from helpers.network import _NetworkManager from helpers.network import _NetworkManager
@pytest.fixture(autouse=True, scope="session") @pytest.fixture(autouse=True, scope="session")
def cleanup_environment(): def cleanup_environment():
try: try:
if int(os.environ.get("PYTEST_CLEANUP_CONTAINERS", 0)) == 1: if int(os.environ.get("PYTEST_CLEANUP_CONTAINERS", 0)) == 1:
logging.debug(f"Cleaning all iptables rules") logging.debug(f"Cleaning all iptables rules")
_NetworkManager.clean_all_user_iptables_rules() _NetworkManager.clean_all_user_iptables_rules()
result = run_and_check(['docker ps | wc -l'], shell=True) result = run_and_check(["docker ps | wc -l"], shell=True)
if int(result) > 1: if int(result) > 1:
if int(os.environ.get("PYTEST_CLEANUP_CONTAINERS", 0)) != 1: if int(os.environ.get("PYTEST_CLEANUP_CONTAINERS", 0)) != 1:
logging.warning(f"Docker containters({int(result)}) are running before tests run. They can be left from previous pytest run and cause test failures.\n"\ logging.warning(
"You can set env PYTEST_CLEANUP_CONTAINERS=1 or use runner with --cleanup-containers argument to enable automatic containers cleanup.") f"Docker containters({int(result)}) are running before tests run. They can be left from previous pytest run and cause test failures.\n"
"You can set env PYTEST_CLEANUP_CONTAINERS=1 or use runner with --cleanup-containers argument to enable automatic containers cleanup."
)
else: else:
logging.debug("Trying to kill unstopped containers...") logging.debug("Trying to kill unstopped containers...")
run_and_check([f'docker kill $(docker container list --all --quiet)'], shell=True, nothrow=True) run_and_check(
run_and_check([f'docker rm $docker container list --all --quiet)'], shell=True, nothrow=True) [f"docker kill $(docker container list --all --quiet)"],
shell=True,
nothrow=True,
)
run_and_check(
[f"docker rm $docker container list --all --quiet)"],
shell=True,
nothrow=True,
)
logging.debug("Unstopped containers killed") logging.debug("Unstopped containers killed")
r = run_and_check(['docker-compose', 'ps', '--services', '--all']) r = run_and_check(["docker-compose", "ps", "--services", "--all"])
logging.debug(f"Docker ps before start:{r.stdout}") logging.debug(f"Docker ps before start:{r.stdout}")
else: else:
logging.debug(f"No running containers") logging.debug(f"No running containers")
@ -31,8 +42,14 @@ def cleanup_environment():
yield yield
def pytest_addoption(parser): def pytest_addoption(parser):
parser.addoption("--run-id", default="", help="run-id is used as postfix in _instances_{} directory") parser.addoption(
"--run-id",
default="",
help="run-id is used as postfix in _instances_{} directory",
)
def pytest_configure(config): def pytest_configure(config):
os.environ['INTEGRATION_TESTS_RUN_ID'] = config.option.run_id os.environ["INTEGRATION_TESTS_RUN_ID"] = config.option.run_id

View File

@ -6,79 +6,117 @@ from threading import Timer
class Client: class Client:
def __init__(self, host, port=9000, command='/usr/bin/clickhouse-client'): def __init__(self, host, port=9000, command="/usr/bin/clickhouse-client"):
self.host = host self.host = host
self.port = port self.port = port
self.command = [command] self.command = [command]
if os.path.basename(command) == 'clickhouse': if os.path.basename(command) == "clickhouse":
self.command.append('client') self.command.append("client")
self.command += ['--host', self.host, '--port', str(self.port), '--stacktrace'] self.command += ["--host", self.host, "--port", str(self.port), "--stacktrace"]
def query(self, sql, def query(
stdin=None, self,
timeout=None, sql,
settings=None, stdin=None,
user=None, timeout=None,
password=None, settings=None,
database=None, user=None,
ignore_error=False, password=None,
query_id=None): database=None,
return self.get_query_request(sql, ignore_error=False,
stdin=stdin, query_id=None,
timeout=timeout, ):
settings=settings, return self.get_query_request(
user=user, sql,
password=password, stdin=stdin,
database=database, timeout=timeout,
ignore_error=ignore_error, settings=settings,
query_id=query_id).get_answer() user=user,
password=password,
database=database,
ignore_error=ignore_error,
query_id=query_id,
).get_answer()
def get_query_request(self, sql, def get_query_request(
stdin=None, self,
timeout=None, sql,
settings=None, stdin=None,
user=None, timeout=None,
password=None, settings=None,
database=None, user=None,
ignore_error=False, password=None,
query_id=None): database=None,
ignore_error=False,
query_id=None,
):
command = self.command[:] command = self.command[:]
if stdin is None: if stdin is None:
command += ['--multiquery', '--testmode'] command += ["--multiquery", "--testmode"]
stdin = sql stdin = sql
else: else:
command += ['--query', sql] command += ["--query", sql]
if settings is not None: if settings is not None:
for setting, value in settings.items(): for setting, value in settings.items():
command += ['--' + setting, str(value)] command += ["--" + setting, str(value)]
if user is not None: if user is not None:
command += ['--user', user] command += ["--user", user]
if password is not None: if password is not None:
command += ['--password', password] command += ["--password", password]
if database is not None: if database is not None:
command += ['--database', database] command += ["--database", database]
if query_id is not None: if query_id is not None:
command += ['--query_id', query_id] command += ["--query_id", query_id]
return CommandRequest(command, stdin, timeout, ignore_error) return CommandRequest(command, stdin, timeout, ignore_error)
def query_and_get_error(self, sql, stdin=None, timeout=None, settings=None, user=None, password=None, def query_and_get_error(
database=None): self,
return self.get_query_request(sql, stdin=stdin, timeout=timeout, settings=settings, user=user, sql,
password=password, database=database).get_error() stdin=None,
timeout=None,
settings=None,
user=None,
password=None,
database=None,
):
return self.get_query_request(
sql,
stdin=stdin,
timeout=timeout,
settings=settings,
user=user,
password=password,
database=database,
).get_error()
def query_and_get_answer_with_error(self, sql, stdin=None, timeout=None, settings=None, user=None, password=None, def query_and_get_answer_with_error(
database=None): self,
return self.get_query_request(sql, stdin=stdin, timeout=timeout, settings=settings, user=user, sql,
password=password, database=database).get_answer_and_error() stdin=None,
timeout=None,
settings=None,
user=None,
password=None,
database=None,
):
return self.get_query_request(
sql,
stdin=stdin,
timeout=timeout,
settings=settings,
user=user,
password=password,
database=database,
).get_answer_and_error()
class QueryTimeoutExceedException(Exception): class QueryTimeoutExceedException(Exception):
@ -95,7 +133,7 @@ class QueryRuntimeException(Exception):
class CommandRequest: class CommandRequest:
def __init__(self, command, stdin=None, timeout=None, ignore_error=False): def __init__(self, command, stdin=None, timeout=None, ignore_error=False):
# Write data to tmp file to avoid PIPEs and execution blocking # Write data to tmp file to avoid PIPEs and execution blocking
stdin_file = tempfile.TemporaryFile(mode='w+') stdin_file = tempfile.TemporaryFile(mode="w+")
stdin_file.write(stdin) stdin_file.write(stdin)
stdin_file.seek(0) stdin_file.seek(0)
self.stdout_file = tempfile.TemporaryFile() self.stdout_file = tempfile.TemporaryFile()
@ -108,11 +146,19 @@ class CommandRequest:
# can print some debug information there # can print some debug information there
env = {} env = {}
env["TSAN_OPTIONS"] = "verbosity=0" env["TSAN_OPTIONS"] = "verbosity=0"
self.process = sp.Popen(command, stdin=stdin_file, stdout=self.stdout_file, stderr=self.stderr_file, env=env, universal_newlines=True) self.process = sp.Popen(
command,
stdin=stdin_file,
stdout=self.stdout_file,
stderr=self.stderr_file,
env=env,
universal_newlines=True,
)
self.timer = None self.timer = None
self.process_finished_before_timeout = True self.process_finished_before_timeout = True
if timeout is not None: if timeout is not None:
def kill_process(): def kill_process():
if self.process.poll() is None: if self.process.poll() is None:
self.process_finished_before_timeout = False self.process_finished_before_timeout = False
@ -126,16 +172,25 @@ class CommandRequest:
self.stdout_file.seek(0) self.stdout_file.seek(0)
self.stderr_file.seek(0) self.stderr_file.seek(0)
stdout = self.stdout_file.read().decode('utf-8', errors='replace') stdout = self.stdout_file.read().decode("utf-8", errors="replace")
stderr = self.stderr_file.read().decode('utf-8', errors='replace') stderr = self.stderr_file.read().decode("utf-8", errors="replace")
if self.timer is not None and not self.process_finished_before_timeout and not self.ignore_error: if (
self.timer is not None
and not self.process_finished_before_timeout
and not self.ignore_error
):
logging.debug(f"Timed out. Last stdout:{stdout}, stderr:{stderr}") logging.debug(f"Timed out. Last stdout:{stdout}, stderr:{stderr}")
raise QueryTimeoutExceedException('Client timed out!') raise QueryTimeoutExceedException("Client timed out!")
if (self.process.returncode != 0 or stderr) and not self.ignore_error: if (self.process.returncode != 0 or stderr) and not self.ignore_error:
raise QueryRuntimeException( raise QueryRuntimeException(
'Client failed! Return code: {}, stderr: {}'.format(self.process.returncode, stderr), self.process.returncode, stderr) "Client failed! Return code: {}, stderr: {}".format(
self.process.returncode, stderr
),
self.process.returncode,
stderr,
)
return stdout return stdout
@ -144,14 +199,22 @@ class CommandRequest:
self.stdout_file.seek(0) self.stdout_file.seek(0)
self.stderr_file.seek(0) self.stderr_file.seek(0)
stdout = self.stdout_file.read().decode('utf-8', errors='replace') stdout = self.stdout_file.read().decode("utf-8", errors="replace")
stderr = self.stderr_file.read().decode('utf-8', errors='replace') stderr = self.stderr_file.read().decode("utf-8", errors="replace")
if self.timer is not None and not self.process_finished_before_timeout and not self.ignore_error: if (
raise QueryTimeoutExceedException('Client timed out!') self.timer is not None
and not self.process_finished_before_timeout
and not self.ignore_error
):
raise QueryTimeoutExceedException("Client timed out!")
if (self.process.returncode == 0): if self.process.returncode == 0:
raise QueryRuntimeException('Client expected to be failed but succeeded! stdout: {}'.format(stdout), self.process.returncode, stderr) raise QueryRuntimeException(
"Client expected to be failed but succeeded! stdout: {}".format(stdout),
self.process.returncode,
stderr,
)
return stderr return stderr
@ -160,10 +223,14 @@ class CommandRequest:
self.stdout_file.seek(0) self.stdout_file.seek(0)
self.stderr_file.seek(0) self.stderr_file.seek(0)
stdout = self.stdout_file.read().decode('utf-8', errors='replace') stdout = self.stdout_file.read().decode("utf-8", errors="replace")
stderr = self.stderr_file.read().decode('utf-8', errors='replace') stderr = self.stderr_file.read().decode("utf-8", errors="replace")
if self.timer is not None and not self.process_finished_before_timeout and not self.ignore_error: if (
raise QueryTimeoutExceedException('Client timed out!') self.timer is not None
and not self.process_finished_before_timeout
and not self.ignore_error
):
raise QueryTimeoutExceedException("Client timed out!")
return (stdout, stderr) return (stdout, stderr)

File diff suppressed because it is too large Load Diff

View File

@ -1,14 +1,29 @@
def corrupt_part_data_on_disk(node, table, part_name): def corrupt_part_data_on_disk(node, table, part_name):
part_path = node.query("SELECT path FROM system.parts WHERE table = '{}' and name = '{}'" part_path = node.query(
.format(table, part_name)).strip() "SELECT path FROM system.parts WHERE table = '{}' and name = '{}'".format(
table, part_name
)
).strip()
corrupt_part_data_by_path(node, part_path) corrupt_part_data_by_path(node, part_path)
def corrupt_part_data_by_path(node, part_path): def corrupt_part_data_by_path(node, part_path):
print("Corrupting part", part_path, "at", node.name) print("Corrupting part", part_path, "at", node.name)
print("Will corrupt: ", print(
node.exec_in_container(['bash', '-c', 'cd {p} && ls *.bin | head -n 1'.format(p=part_path)])) "Will corrupt: ",
node.exec_in_container(
["bash", "-c", "cd {p} && ls *.bin | head -n 1".format(p=part_path)]
),
)
node.exec_in_container(['bash', '-c', node.exec_in_container(
'cd {p} && ls *.bin | head -n 1 | xargs -I{{}} sh -c \'echo "1" >> $1\' -- {{}}'.format( [
p=part_path)], privileged=True) "bash",
"-c",
"cd {p} && ls *.bin | head -n 1 | xargs -I{{}} sh -c 'echo \"1\" >> $1' -- {{}}".format(
p=part_path
),
],
privileged=True,
)

View File

@ -4,18 +4,18 @@ import copy
class Layout(object): class Layout(object):
LAYOUTS_STR_DICT = { LAYOUTS_STR_DICT = {
'flat': '<flat/>', "flat": "<flat/>",
'hashed': '<hashed/>', "hashed": "<hashed/>",
'cache': '<cache><size_in_cells>128</size_in_cells></cache>', "cache": "<cache><size_in_cells>128</size_in_cells></cache>",
'ssd_cache': '<ssd_cache><path>/etc/clickhouse-server/dictionaries/all</path></ssd_cache>', "ssd_cache": "<ssd_cache><path>/etc/clickhouse-server/dictionaries/all</path></ssd_cache>",
'complex_key_hashed': '<complex_key_hashed/>', "complex_key_hashed": "<complex_key_hashed/>",
'complex_key_hashed_one_key': '<complex_key_hashed/>', "complex_key_hashed_one_key": "<complex_key_hashed/>",
'complex_key_hashed_two_keys': '<complex_key_hashed/>', "complex_key_hashed_two_keys": "<complex_key_hashed/>",
'complex_key_cache': '<complex_key_cache><size_in_cells>128</size_in_cells></complex_key_cache>', "complex_key_cache": "<complex_key_cache><size_in_cells>128</size_in_cells></complex_key_cache>",
'complex_key_ssd_cache': '<complex_key_ssd_cache><path>/etc/clickhouse-server/dictionaries/all</path></complex_key_ssd_cache>', "complex_key_ssd_cache": "<complex_key_ssd_cache><path>/etc/clickhouse-server/dictionaries/all</path></complex_key_ssd_cache>",
'range_hashed': '<range_hashed/>', "range_hashed": "<range_hashed/>",
'direct': '<direct/>', "direct": "<direct/>",
'complex_key_direct': '<complex_key_direct/>' "complex_key_direct": "<complex_key_direct/>",
} }
def __init__(self, name): def __init__(self, name):
@ -23,14 +23,14 @@ class Layout(object):
self.is_complex = False self.is_complex = False
self.is_simple = False self.is_simple = False
self.is_ranged = False self.is_ranged = False
if self.name.startswith('complex'): if self.name.startswith("complex"):
self.layout_type = 'complex' self.layout_type = "complex"
self.is_complex = True self.is_complex = True
elif name.startswith('range'): elif name.startswith("range"):
self.layout_type = 'ranged' self.layout_type = "ranged"
self.is_ranged = True self.is_ranged = True
else: else:
self.layout_type = 'simple' self.layout_type = "simple"
self.is_simple = True self.is_simple = True
def get_str(self): def get_str(self):
@ -38,8 +38,8 @@ class Layout(object):
def get_key_block_name(self): def get_key_block_name(self):
if self.is_complex: if self.is_complex:
return 'key' return "key"
return 'id' return "id"
class Row(object): class Row(object):
@ -59,8 +59,17 @@ class Row(object):
class Field(object): class Field(object):
def __init__(self, name, field_type, is_key=False, is_range_key=False, default=None, hierarchical=False, def __init__(
range_hash_type=None, default_value_for_get=None): self,
name,
field_type,
is_key=False,
is_range_key=False,
default=None,
hierarchical=False,
range_hash_type=None,
default_value_for_get=None,
):
self.name = name self.name = name
self.field_type = field_type self.field_type = field_type
self.is_key = is_key self.is_key = is_key
@ -72,30 +81,32 @@ class Field(object):
self.default_value_for_get = default_value_for_get self.default_value_for_get = default_value_for_get
def get_attribute_str(self): def get_attribute_str(self):
return ''' return """
<attribute> <attribute>
<name>{name}</name> <name>{name}</name>
<type>{field_type}</type> <type>{field_type}</type>
<null_value>{default}</null_value> <null_value>{default}</null_value>
<hierarchical>{hierarchical}</hierarchical> <hierarchical>{hierarchical}</hierarchical>
</attribute>'''.format( </attribute>""".format(
name=self.name, name=self.name,
field_type=self.field_type, field_type=self.field_type,
default=self.default if self.default else '', default=self.default if self.default else "",
hierarchical='true' if self.hierarchical else 'false', hierarchical="true" if self.hierarchical else "false",
) )
def get_simple_index_str(self): def get_simple_index_str(self):
return '<name>{name}</name>'.format(name=self.name) return "<name>{name}</name>".format(name=self.name)
def get_range_hash_str(self): def get_range_hash_str(self):
if not self.range_hash_type: if not self.range_hash_type:
raise Exception("Field {} is not range hashed".format(self.name)) raise Exception("Field {} is not range hashed".format(self.name))
return ''' return """
<range_{type}> <range_{type}>
<name>{name}</name> <name>{name}</name>
</range_{type}> </range_{type}>
'''.format(type=self.range_hash_type, name=self.name) """.format(
type=self.range_hash_type, name=self.name
)
class DictionaryStructure(object): class DictionaryStructure(object):
@ -125,9 +136,14 @@ class DictionaryStructure(object):
if not self.layout.is_complex and len(self.keys) > 1: if not self.layout.is_complex and len(self.keys) > 1:
raise Exception( raise Exception(
"More than one key {} field in non complex layout {}".format(len(self.keys), self.layout.name)) "More than one key {} field in non complex layout {}".format(
len(self.keys), self.layout.name
)
)
if self.layout.is_ranged and (not self.range_key or len(self.range_fields) != 2): if self.layout.is_ranged and (
not self.range_key or len(self.range_fields) != 2
):
raise Exception("Inconsistent configuration of ranged dictionary") raise Exception("Inconsistent configuration of ranged dictionary")
def get_structure_str(self): def get_structure_str(self):
@ -148,7 +164,7 @@ class DictionaryStructure(object):
for range_field in self.range_fields: for range_field in self.range_fields:
ranged_strs.append(range_field.get_range_hash_str()) ranged_strs.append(range_field.get_range_hash_str())
return ''' return """
<layout> <layout>
{layout_str} {layout_str}
</layout> </layout>
@ -158,12 +174,12 @@ class DictionaryStructure(object):
</{key_block_name}> </{key_block_name}>
{range_strs} {range_strs}
{attributes_str} {attributes_str}
</structure>'''.format( </structure>""".format(
layout_str=self.layout.get_str(), layout_str=self.layout.get_str(),
key_block_name=self.layout.get_key_block_name(), key_block_name=self.layout.get_key_block_name(),
key_str='\n'.join(key_strs), key_str="\n".join(key_strs),
attributes_str='\n'.join(fields_strs), attributes_str="\n".join(fields_strs),
range_strs='\n'.join(ranged_strs), range_strs="\n".join(ranged_strs),
) )
def get_ordered_names(self): def get_ordered_names(self):
@ -179,15 +195,19 @@ class DictionaryStructure(object):
def get_all_fields(self): def get_all_fields(self):
return self.keys + self.range_fields + self.ordinary_fields return self.keys + self.range_fields + self.ordinary_fields
def _get_dict_get_common_expression(self, dict_name, field, row, or_default, with_type, has): def _get_dict_get_common_expression(
self, dict_name, field, row, or_default, with_type, has
):
if field in self.keys: if field in self.keys:
raise Exception("Trying to receive key field {} from dictionary".format(field.name)) raise Exception(
"Trying to receive key field {} from dictionary".format(field.name)
)
if not self.layout.is_complex: if not self.layout.is_complex:
if not or_default: if not or_default:
key_expr = ', toUInt64({})'.format(row.data[self.keys[0].name]) key_expr = ", toUInt64({})".format(row.data[self.keys[0].name])
else: else:
key_expr = ', toUInt64({})'.format(self.keys[0].default_value_for_get) key_expr = ", toUInt64({})".format(self.keys[0].default_value_for_get)
else: else:
key_exprs_strs = [] key_exprs_strs = []
for key in self.keys: for key in self.keys:
@ -197,48 +217,57 @@ class DictionaryStructure(object):
val = key.default_value_for_get val = key.default_value_for_get
if isinstance(val, str): if isinstance(val, str):
val = "'" + val + "'" val = "'" + val + "'"
key_exprs_strs.append('to{type}({value})'.format(type=key.field_type, value=val)) key_exprs_strs.append(
key_expr = ', tuple(' + ','.join(key_exprs_strs) + ')' "to{type}({value})".format(type=key.field_type, value=val)
)
key_expr = ", tuple(" + ",".join(key_exprs_strs) + ")"
date_expr = '' date_expr = ""
if self.layout.is_ranged: if self.layout.is_ranged:
val = row.data[self.range_key.name] val = row.data[self.range_key.name]
if isinstance(val, str): if isinstance(val, str):
val = "'" + val + "'" val = "'" + val + "'"
val = "to{type}({val})".format(type=self.range_key.field_type, val=val) val = "to{type}({val})".format(type=self.range_key.field_type, val=val)
date_expr = ', ' + val date_expr = ", " + val
if or_default: if or_default:
raise Exception("Can create 'dictGetOrDefault' query for ranged dictionary") raise Exception(
"Can create 'dictGetOrDefault' query for ranged dictionary"
)
if or_default: if or_default:
or_default_expr = 'OrDefault' or_default_expr = "OrDefault"
if field.default_value_for_get is None: if field.default_value_for_get is None:
raise Exception( raise Exception(
"Can create 'dictGetOrDefault' query for field {} without default_value_for_get".format(field.name)) "Can create 'dictGetOrDefault' query for field {} without default_value_for_get".format(
field.name
)
)
val = field.default_value_for_get val = field.default_value_for_get
if isinstance(val, str): if isinstance(val, str):
val = "'" + val + "'" val = "'" + val + "'"
default_value_for_get = ', to{type}({value})'.format(type=field.field_type, value=val) default_value_for_get = ", to{type}({value})".format(
type=field.field_type, value=val
)
else: else:
or_default_expr = '' or_default_expr = ""
default_value_for_get = '' default_value_for_get = ""
if with_type: if with_type:
field_type = field.field_type field_type = field.field_type
else: else:
field_type = '' field_type = ""
field_name = ", '" + field.name + "'" field_name = ", '" + field.name + "'"
if has: if has:
what = "Has" what = "Has"
field_type = '' field_type = ""
or_default = '' or_default = ""
field_name = '' field_name = ""
date_expr = '' date_expr = ""
def_for_get = '' def_for_get = ""
else: else:
what = "Get" what = "Get"
@ -255,28 +284,38 @@ class DictionaryStructure(object):
def get_get_expressions(self, dict_name, field, row): def get_get_expressions(self, dict_name, field, row):
return [ return [
self._get_dict_get_common_expression(dict_name, field, row, or_default=False, with_type=False, has=False), self._get_dict_get_common_expression(
self._get_dict_get_common_expression(dict_name, field, row, or_default=False, with_type=True, has=False), dict_name, field, row, or_default=False, with_type=False, has=False
),
self._get_dict_get_common_expression(
dict_name, field, row, or_default=False, with_type=True, has=False
),
] ]
def get_get_or_default_expressions(self, dict_name, field, row): def get_get_or_default_expressions(self, dict_name, field, row):
if not self.layout.is_ranged: if not self.layout.is_ranged:
return [ return [
self._get_dict_get_common_expression(dict_name, field, row, or_default=True, with_type=False, self._get_dict_get_common_expression(
has=False), dict_name, field, row, or_default=True, with_type=False, has=False
self._get_dict_get_common_expression(dict_name, field, row, or_default=True, with_type=True, has=False), ),
self._get_dict_get_common_expression(
dict_name, field, row, or_default=True, with_type=True, has=False
),
] ]
return [] return []
def get_has_expressions(self, dict_name, field, row): def get_has_expressions(self, dict_name, field, row):
if not self.layout.is_ranged: if not self.layout.is_ranged:
return [self._get_dict_get_common_expression(dict_name, field, row, or_default=False, with_type=False, return [
has=True)] self._get_dict_get_common_expression(
dict_name, field, row, or_default=False, with_type=False, has=True
)
]
return [] return []
def get_hierarchical_expressions(self, dict_name, row): def get_hierarchical_expressions(self, dict_name, row):
if self.layout.is_simple: if self.layout.is_simple:
key_expr = 'toUInt64({})'.format(row.data[self.keys[0].name]) key_expr = "toUInt64({})".format(row.data[self.keys[0].name])
return [ return [
"dictGetHierarchy('{dict_name}', {key})".format( "dictGetHierarchy('{dict_name}', {key})".format(
dict_name=dict_name, dict_name=dict_name,
@ -288,21 +327,31 @@ class DictionaryStructure(object):
def get_is_in_expressions(self, dict_name, row, parent_row): def get_is_in_expressions(self, dict_name, row, parent_row):
if self.layout.is_simple: if self.layout.is_simple:
child_key_expr = 'toUInt64({})'.format(row.data[self.keys[0].name]) child_key_expr = "toUInt64({})".format(row.data[self.keys[0].name])
parent_key_expr = 'toUInt64({})'.format(parent_row.data[self.keys[0].name]) parent_key_expr = "toUInt64({})".format(parent_row.data[self.keys[0].name])
return [ return [
"dictIsIn('{dict_name}', {child_key}, {parent_key})".format( "dictIsIn('{dict_name}', {child_key}, {parent_key})".format(
dict_name=dict_name, dict_name=dict_name,
child_key=child_key_expr, child_key=child_key_expr,
parent_key=parent_key_expr, ) parent_key=parent_key_expr,
)
] ]
return [] return []
class Dictionary(object): class Dictionary(object):
def __init__(self, name, structure, source, config_path, def __init__(
table_name, fields, min_lifetime=3, max_lifetime=5): self,
name,
structure,
source,
config_path,
table_name,
fields,
min_lifetime=3,
max_lifetime=5,
):
self.name = name self.name = name
self.structure = copy.deepcopy(structure) self.structure = copy.deepcopy(structure)
self.source = copy.deepcopy(source) self.source = copy.deepcopy(source)
@ -313,9 +362,10 @@ class Dictionary(object):
self.max_lifetime = max_lifetime self.max_lifetime = max_lifetime
def generate_config(self): def generate_config(self):
with open(self.config_path, 'w') as result: with open(self.config_path, "w") as result:
if 'direct' not in self.structure.layout.get_str(): if "direct" not in self.structure.layout.get_str():
result.write(''' result.write(
"""
<clickhouse> <clickhouse>
<dictionary> <dictionary>
<lifetime> <lifetime>
@ -329,15 +379,17 @@ class Dictionary(object):
</source> </source>
</dictionary> </dictionary>
</clickhouse> </clickhouse>
'''.format( """.format(
min_lifetime=self.min_lifetime, min_lifetime=self.min_lifetime,
max_lifetime=self.max_lifetime, max_lifetime=self.max_lifetime,
name=self.name, name=self.name,
structure=self.structure.get_structure_str(), structure=self.structure.get_structure_str(),
source=self.source.get_source_str(self.table_name), source=self.source.get_source_str(self.table_name),
)) )
)
else: else:
result.write(''' result.write(
"""
<clickhouse> <clickhouse>
<dictionary> <dictionary>
<name>{name}</name> <name>{name}</name>
@ -347,38 +399,59 @@ class Dictionary(object):
</source> </source>
</dictionary> </dictionary>
</clickhouse> </clickhouse>
'''.format( """.format(
min_lifetime=self.min_lifetime, min_lifetime=self.min_lifetime,
max_lifetime=self.max_lifetime, max_lifetime=self.max_lifetime,
name=self.name, name=self.name,
structure=self.structure.get_structure_str(), structure=self.structure.get_structure_str(),
source=self.source.get_source_str(self.table_name), source=self.source.get_source_str(self.table_name),
)) )
)
def prepare_source(self, cluster): def prepare_source(self, cluster):
self.source.prepare(self.structure, self.table_name, cluster) self.source.prepare(self.structure, self.table_name, cluster)
def load_data(self, data): def load_data(self, data):
if not self.source.prepared: if not self.source.prepared:
raise Exception("Cannot load data for dictionary {}, source is not prepared".format(self.name)) raise Exception(
"Cannot load data for dictionary {}, source is not prepared".format(
self.name
)
)
self.source.load_data(data, self.table_name) self.source.load_data(data, self.table_name)
def get_select_get_queries(self, field, row): def get_select_get_queries(self, field, row):
return ['select {}'.format(expr) for expr in self.structure.get_get_expressions(self.name, field, row)] return [
"select {}".format(expr)
for expr in self.structure.get_get_expressions(self.name, field, row)
]
def get_select_get_or_default_queries(self, field, row): def get_select_get_or_default_queries(self, field, row):
return ['select {}'.format(expr) for expr in return [
self.structure.get_get_or_default_expressions(self.name, field, row)] "select {}".format(expr)
for expr in self.structure.get_get_or_default_expressions(
self.name, field, row
)
]
def get_select_has_queries(self, field, row): def get_select_has_queries(self, field, row):
return ['select {}'.format(expr) for expr in self.structure.get_has_expressions(self.name, field, row)] return [
"select {}".format(expr)
for expr in self.structure.get_has_expressions(self.name, field, row)
]
def get_hierarchical_queries(self, row): def get_hierarchical_queries(self, row):
return ['select {}'.format(expr) for expr in self.structure.get_hierarchical_expressions(self.name, row)] return [
"select {}".format(expr)
for expr in self.structure.get_hierarchical_expressions(self.name, row)
]
def get_is_in_queries(self, row, parent_row): def get_is_in_queries(self, row, parent_row):
return ['select {}'.format(expr) for expr in self.structure.get_is_in_expressions(self.name, row, parent_row)] return [
"select {}".format(expr)
for expr in self.structure.get_is_in_expressions(self.name, row, parent_row)
]
def is_complex(self): def is_complex(self):
return self.structure.layout.is_complex return self.structure.layout.is_complex

View File

@ -11,9 +11,18 @@ import pymysql.cursors
import redis import redis
import logging import logging
class ExternalSource(object): class ExternalSource(object):
def __init__(self, name, internal_hostname, internal_port, def __init__(
docker_hostname, docker_port, user, password): self,
name,
internal_hostname,
internal_port,
docker_hostname,
docker_port,
user,
password,
):
self.name = name self.name = name
self.internal_hostname = internal_hostname self.internal_hostname = internal_hostname
self.internal_port = int(internal_port) self.internal_port = int(internal_port)
@ -23,17 +32,26 @@ class ExternalSource(object):
self.password = password self.password = password
def get_source_str(self, table_name): def get_source_str(self, table_name):
raise NotImplementedError("Method {} is not implemented for {}".format( raise NotImplementedError(
"get_source_config_part", self.__class__.__name__)) "Method {} is not implemented for {}".format(
"get_source_config_part", self.__class__.__name__
)
)
def prepare(self, structure, table_name, cluster): def prepare(self, structure, table_name, cluster):
raise NotImplementedError("Method {} is not implemented for {}".format( raise NotImplementedError(
"prepare_remote_source", self.__class__.__name__)) "Method {} is not implemented for {}".format(
"prepare_remote_source", self.__class__.__name__
)
)
# data is banch of Row # data is banch of Row
def load_data(self, data): def load_data(self, data):
raise NotImplementedError("Method {} is not implemented for {}".format( raise NotImplementedError(
"prepare_remote_source", self.__class__.__name__)) "Method {} is not implemented for {}".format(
"prepare_remote_source", self.__class__.__name__
)
)
def compatible_with_layout(self, layout): def compatible_with_layout(self, layout):
return True return True
@ -41,29 +59,32 @@ class ExternalSource(object):
class SourceMySQL(ExternalSource): class SourceMySQL(ExternalSource):
TYPE_MAPPING = { TYPE_MAPPING = {
'UInt8': 'tinyint unsigned', "UInt8": "tinyint unsigned",
'UInt16': 'smallint unsigned', "UInt16": "smallint unsigned",
'UInt32': 'int unsigned', "UInt32": "int unsigned",
'UInt64': 'bigint unsigned', "UInt64": "bigint unsigned",
'Int8': 'tinyint', "Int8": "tinyint",
'Int16': 'smallint', "Int16": "smallint",
'Int32': 'int', "Int32": "int",
'Int64': 'bigint', "Int64": "bigint",
'UUID': 'varchar(36)', "UUID": "varchar(36)",
'Date': 'date', "Date": "date",
'DateTime': 'datetime', "DateTime": "datetime",
'String': 'text', "String": "text",
'Float32': 'float', "Float32": "float",
'Float64': 'double' "Float64": "double",
} }
def create_mysql_conn(self): def create_mysql_conn(self):
logging.debug(f"pymysql connect {self.user}, {self.password}, {self.internal_hostname}, {self.internal_port}") logging.debug(
f"pymysql connect {self.user}, {self.password}, {self.internal_hostname}, {self.internal_port}"
)
self.connection = pymysql.connect( self.connection = pymysql.connect(
user=self.user, user=self.user,
password=self.password, password=self.password,
host=self.internal_hostname, host=self.internal_hostname,
port=self.internal_port) port=self.internal_port,
)
def execute_mysql_query(self, query): def execute_mysql_query(self, query):
with warnings.catch_warnings(): with warnings.catch_warnings():
@ -73,7 +94,7 @@ class SourceMySQL(ExternalSource):
self.connection.commit() self.connection.commit()
def get_source_str(self, table_name): def get_source_str(self, table_name):
return ''' return """
<mysql> <mysql>
<replica> <replica>
<priority>1</priority> <priority>1</priority>
@ -89,7 +110,7 @@ class SourceMySQL(ExternalSource):
<password>{password}</password> <password>{password}</password>
<db>test</db> <db>test</db>
<table>{tbl}</table> <table>{tbl}</table>
</mysql>'''.format( </mysql>""".format(
hostname=self.docker_hostname, hostname=self.docker_hostname,
port=self.docker_port, port=self.docker_port,
user=self.user, user=self.user,
@ -101,14 +122,20 @@ class SourceMySQL(ExternalSource):
if self.internal_hostname is None: if self.internal_hostname is None:
self.internal_hostname = cluster.mysql_ip self.internal_hostname = cluster.mysql_ip
self.create_mysql_conn() self.create_mysql_conn()
self.execute_mysql_query("create database if not exists test default character set 'utf8'") self.execute_mysql_query(
"create database if not exists test default character set 'utf8'"
)
self.execute_mysql_query("drop table if exists test.{}".format(table_name)) self.execute_mysql_query("drop table if exists test.{}".format(table_name))
fields_strs = [] fields_strs = []
for field in structure.keys + structure.ordinary_fields + structure.range_fields: for field in (
fields_strs.append(field.name + ' ' + self.TYPE_MAPPING[field.field_type]) structure.keys + structure.ordinary_fields + structure.range_fields
create_query = '''create table test.{table_name} ( ):
fields_strs.append(field.name + " " + self.TYPE_MAPPING[field.field_type])
create_query = """create table test.{table_name} (
{fields_str}); {fields_str});
'''.format(table_name=table_name, fields_str=','.join(fields_strs)) """.format(
table_name=table_name, fields_str=",".join(fields_strs)
)
self.execute_mysql_query(create_query) self.execute_mysql_query(create_query)
self.ordered_names = structure.get_ordered_names() self.ordered_names = structure.get_ordered_names()
self.prepared = True self.prepared = True
@ -126,18 +153,16 @@ class SourceMySQL(ExternalSource):
else: else:
data = str(data) data = str(data)
sorted_row.append(data) sorted_row.append(data)
values_strs.append('(' + ','.join(sorted_row) + ')') values_strs.append("(" + ",".join(sorted_row) + ")")
query = 'insert into test.{} ({}) values {}'.format( query = "insert into test.{} ({}) values {}".format(
table_name, table_name, ",".join(self.ordered_names), ",".join(values_strs)
','.join(self.ordered_names), )
','.join(values_strs))
self.execute_mysql_query(query) self.execute_mysql_query(query)
class SourceMongo(ExternalSource): class SourceMongo(ExternalSource):
def get_source_str(self, table_name): def get_source_str(self, table_name):
return ''' return """
<mongodb> <mongodb>
<host>{host}</host> <host>{host}</host>
<port>{port}</port> <port>{port}</port>
@ -146,7 +171,7 @@ class SourceMongo(ExternalSource):
<db>test</db> <db>test</db>
<collection>{tbl}</collection> <collection>{tbl}</collection>
</mongodb> </mongodb>
'''.format( """.format(
host=self.docker_hostname, host=self.docker_hostname,
port=self.docker_port, port=self.docker_port,
user=self.user, user=self.user,
@ -155,22 +180,29 @@ class SourceMongo(ExternalSource):
) )
def prepare(self, structure, table_name, cluster): def prepare(self, structure, table_name, cluster):
connection_str = 'mongodb://{user}:{password}@{host}:{port}'.format( connection_str = "mongodb://{user}:{password}@{host}:{port}".format(
host=self.internal_hostname, port=self.internal_port, host=self.internal_hostname,
user=self.user, password=self.password) port=self.internal_port,
user=self.user,
password=self.password,
)
self.connection = pymongo.MongoClient(connection_str) self.connection = pymongo.MongoClient(connection_str)
self.converters = {} self.converters = {}
for field in structure.get_all_fields(): for field in structure.get_all_fields():
if field.field_type == "Date": if field.field_type == "Date":
self.converters[field.name] = lambda x: datetime.datetime.strptime(x, "%Y-%m-%d") self.converters[field.name] = lambda x: datetime.datetime.strptime(
x, "%Y-%m-%d"
)
elif field.field_type == "DateTime": elif field.field_type == "DateTime":
def converter(x): def converter(x):
return datetime.datetime.strptime(x, '%Y-%m-%d %H:%M:%S') return datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S")
self.converters[field.name] = converter self.converters[field.name] = converter
else: else:
self.converters[field.name] = lambda x: x self.converters[field.name] = lambda x: x
self.db = self.connection['test'] self.db = self.connection["test"]
self.db.add_user(self.user, self.password) self.db.add_user(self.user, self.password)
self.prepared = True self.prepared = True
@ -191,15 +223,15 @@ class SourceMongoURI(SourceMongo):
def compatible_with_layout(self, layout): def compatible_with_layout(self, layout):
# It is enough to test one layout for this dictionary, since we're # It is enough to test one layout for this dictionary, since we're
# only testing that the connection with URI works. # only testing that the connection with URI works.
return layout.name == 'flat' return layout.name == "flat"
def get_source_str(self, table_name): def get_source_str(self, table_name):
return ''' return """
<mongodb> <mongodb>
<uri>mongodb://{user}:{password}@{host}:{port}/test</uri> <uri>mongodb://{user}:{password}@{host}:{port}/test</uri>
<collection>{tbl}</collection> <collection>{tbl}</collection>
</mongodb> </mongodb>
'''.format( """.format(
host=self.docker_hostname, host=self.docker_hostname,
port=self.docker_port, port=self.docker_port,
user=self.user, user=self.user,
@ -209,9 +241,8 @@ class SourceMongoURI(SourceMongo):
class SourceClickHouse(ExternalSource): class SourceClickHouse(ExternalSource):
def get_source_str(self, table_name): def get_source_str(self, table_name):
return ''' return """
<clickhouse> <clickhouse>
<host>{host}</host> <host>{host}</host>
<port>{port}</port> <port>{port}</port>
@ -220,7 +251,7 @@ class SourceClickHouse(ExternalSource):
<db>test</db> <db>test</db>
<table>{tbl}</table> <table>{tbl}</table>
</clickhouse> </clickhouse>
'''.format( """.format(
host=self.docker_hostname, host=self.docker_hostname,
port=self.docker_port, port=self.docker_port,
user=self.user, user=self.user,
@ -232,11 +263,15 @@ class SourceClickHouse(ExternalSource):
self.node = cluster.instances[self.docker_hostname] self.node = cluster.instances[self.docker_hostname]
self.node.query("CREATE DATABASE IF NOT EXISTS test") self.node.query("CREATE DATABASE IF NOT EXISTS test")
fields_strs = [] fields_strs = []
for field in structure.keys + structure.ordinary_fields + structure.range_fields: for field in (
fields_strs.append(field.name + ' ' + field.field_type) structure.keys + structure.ordinary_fields + structure.range_fields
create_query = '''CREATE TABLE test.{table_name} ( ):
fields_strs.append(field.name + " " + field.field_type)
create_query = """CREATE TABLE test.{table_name} (
{fields_str}) ENGINE MergeTree ORDER BY tuple(); {fields_str}) ENGINE MergeTree ORDER BY tuple();
'''.format(table_name=table_name, fields_str=','.join(fields_strs)) """.format(
table_name=table_name, fields_str=",".join(fields_strs)
)
self.node.query(create_query) self.node.query(create_query)
self.ordered_names = structure.get_ordered_names() self.ordered_names = structure.get_ordered_names()
self.prepared = True self.prepared = True
@ -254,31 +289,31 @@ class SourceClickHouse(ExternalSource):
else: else:
row_data = str(row_data) row_data = str(row_data)
sorted_row.append(row_data) sorted_row.append(row_data)
values_strs.append('(' + ','.join(sorted_row) + ')') values_strs.append("(" + ",".join(sorted_row) + ")")
query = 'INSERT INTO test.{} ({}) values {}'.format( query = "INSERT INTO test.{} ({}) values {}".format(
table_name, table_name, ",".join(self.ordered_names), ",".join(values_strs)
','.join(self.ordered_names), )
','.join(values_strs))
self.node.query(query) self.node.query(query)
class SourceFile(ExternalSource): class SourceFile(ExternalSource):
def get_source_str(self, table_name): def get_source_str(self, table_name):
table_path = "/" + table_name + ".tsv" table_path = "/" + table_name + ".tsv"
return ''' return """
<file> <file>
<path>{path}</path> <path>{path}</path>
<format>TabSeparated</format> <format>TabSeparated</format>
</file> </file>
'''.format( """.format(
path=table_path, path=table_path,
) )
def prepare(self, structure, table_name, cluster): def prepare(self, structure, table_name, cluster):
self.node = cluster.instances[self.docker_hostname] self.node = cluster.instances[self.docker_hostname]
path = "/" + table_name + ".tsv" path = "/" + table_name + ".tsv"
self.node.exec_in_container(["bash", "-c", "touch {}".format(path)], user="root") self.node.exec_in_container(
["bash", "-c", "touch {}".format(path)], user="root"
)
self.ordered_names = structure.get_ordered_names() self.ordered_names = structure.get_ordered_names()
self.prepared = True self.prepared = True
@ -291,35 +326,45 @@ class SourceFile(ExternalSource):
for name in self.ordered_names: for name in self.ordered_names:
sorted_row.append(str(row.data[name])) sorted_row.append(str(row.data[name]))
str_data = '\t'.join(sorted_row) str_data = "\t".join(sorted_row)
self.node.exec_in_container(["bash", "-c", "echo \"{row}\" >> {fname}".format(row=str_data, fname=path)], self.node.exec_in_container(
user="root") [
"bash",
"-c",
'echo "{row}" >> {fname}'.format(row=str_data, fname=path),
],
user="root",
)
def compatible_with_layout(self, layout): def compatible_with_layout(self, layout):
return 'cache' not in layout.name and 'direct' not in layout.name return "cache" not in layout.name and "direct" not in layout.name
class _SourceExecutableBase(ExternalSource): class _SourceExecutableBase(ExternalSource):
def _get_cmd(self, path): def _get_cmd(self, path):
raise NotImplementedError("Method {} is not implemented for {}".format( raise NotImplementedError(
"_get_cmd", self.__class__.__name__)) "Method {} is not implemented for {}".format(
"_get_cmd", self.__class__.__name__
)
)
def get_source_str(self, table_name): def get_source_str(self, table_name):
table_path = "/" + table_name + ".tsv" table_path = "/" + table_name + ".tsv"
return ''' return """
<executable> <executable>
<command>{cmd}</command> <command>{cmd}</command>
<format>TabSeparated</format> <format>TabSeparated</format>
</executable> </executable>
'''.format( """.format(
cmd=self._get_cmd(table_path), cmd=self._get_cmd(table_path),
) )
def prepare(self, structure, table_name, cluster): def prepare(self, structure, table_name, cluster):
self.node = cluster.instances[self.docker_hostname] self.node = cluster.instances[self.docker_hostname]
path = "/" + table_name + ".tsv" path = "/" + table_name + ".tsv"
self.node.exec_in_container(["bash", "-c", "touch {}".format(path)], user="root") self.node.exec_in_container(
["bash", "-c", "touch {}".format(path)], user="root"
)
self.ordered_names = structure.get_ordered_names() self.ordered_names = structure.get_ordered_names()
self.prepared = True self.prepared = True
@ -332,27 +377,31 @@ class _SourceExecutableBase(ExternalSource):
for name in self.ordered_names: for name in self.ordered_names:
sorted_row.append(str(row.data[name])) sorted_row.append(str(row.data[name]))
str_data = '\t'.join(sorted_row) str_data = "\t".join(sorted_row)
self.node.exec_in_container(["bash", "-c", "echo \"{row}\" >> {fname}".format(row=str_data, fname=path)], self.node.exec_in_container(
user='root') [
"bash",
"-c",
'echo "{row}" >> {fname}'.format(row=str_data, fname=path),
],
user="root",
)
class SourceExecutableHashed(_SourceExecutableBase): class SourceExecutableHashed(_SourceExecutableBase):
def _get_cmd(self, path): def _get_cmd(self, path):
return "cat {}".format(path) return "cat {}".format(path)
def compatible_with_layout(self, layout): def compatible_with_layout(self, layout):
return 'hashed' in layout.name return "hashed" in layout.name
class SourceExecutableCache(_SourceExecutableBase): class SourceExecutableCache(_SourceExecutableBase):
def _get_cmd(self, path): def _get_cmd(self, path):
return "cat - >/dev/null;cat {}".format(path) return "cat - >/dev/null;cat {}".format(path)
def compatible_with_layout(self, layout): def compatible_with_layout(self, layout):
return 'cache' in layout.name return "cache" in layout.name
class SourceHTTPBase(ExternalSource): class SourceHTTPBase(ExternalSource):
@ -360,10 +409,11 @@ class SourceHTTPBase(ExternalSource):
def get_source_str(self, table_name): def get_source_str(self, table_name):
self.http_port = SourceHTTPBase.PORT_COUNTER self.http_port = SourceHTTPBase.PORT_COUNTER
url = "{schema}://{host}:{port}/".format(schema=self._get_schema(), host=self.docker_hostname, url = "{schema}://{host}:{port}/".format(
port=self.http_port) schema=self._get_schema(), host=self.docker_hostname, port=self.http_port
)
SourceHTTPBase.PORT_COUNTER += 1 SourceHTTPBase.PORT_COUNTER += 1
return ''' return """
<http> <http>
<url>{url}</url> <url>{url}</url>
<format>TabSeparated</format> <format>TabSeparated</format>
@ -378,22 +428,37 @@ class SourceHTTPBase(ExternalSource):
</header> </header>
</headers> </headers>
</http> </http>
'''.format(url=url) """.format(
url=url
)
def prepare(self, structure, table_name, cluster): def prepare(self, structure, table_name, cluster):
self.node = cluster.instances[self.docker_hostname] self.node = cluster.instances[self.docker_hostname]
path = "/" + table_name + ".tsv" path = "/" + table_name + ".tsv"
self.node.exec_in_container(["bash", "-c", "touch {}".format(path)], user='root') self.node.exec_in_container(
["bash", "-c", "touch {}".format(path)], user="root"
)
script_dir = os.path.dirname(os.path.realpath(__file__)) script_dir = os.path.dirname(os.path.realpath(__file__))
self.node.copy_file_to_container(os.path.join(script_dir, './http_server.py'), '/http_server.py') self.node.copy_file_to_container(
self.node.copy_file_to_container(os.path.join(script_dir, './fake_cert.pem'), '/fake_cert.pem') os.path.join(script_dir, "./http_server.py"), "/http_server.py"
self.node.exec_in_container([ )
"bash", self.node.copy_file_to_container(
"-c", os.path.join(script_dir, "./fake_cert.pem"), "/fake_cert.pem"
"python3 /http_server.py --data-path={tbl} --schema={schema} --host={host} --port={port} --cert-path=/fake_cert.pem".format( )
tbl=path, schema=self._get_schema(), host=self.docker_hostname, port=self.http_port) self.node.exec_in_container(
], detach=True) [
"bash",
"-c",
"python3 /http_server.py --data-path={tbl} --schema={schema} --host={host} --port={port} --cert-path=/fake_cert.pem".format(
tbl=path,
schema=self._get_schema(),
host=self.docker_hostname,
port=self.http_port,
),
],
detach=True,
)
self.ordered_names = structure.get_ordered_names() self.ordered_names = structure.get_ordered_names()
self.prepared = True self.prepared = True
@ -406,9 +471,15 @@ class SourceHTTPBase(ExternalSource):
for name in self.ordered_names: for name in self.ordered_names:
sorted_row.append(str(row.data[name])) sorted_row.append(str(row.data[name]))
str_data = '\t'.join(sorted_row) str_data = "\t".join(sorted_row)
self.node.exec_in_container(["bash", "-c", "echo \"{row}\" >> {fname}".format(row=str_data, fname=path)], self.node.exec_in_container(
user='root') [
"bash",
"-c",
'echo "{row}" >> {fname}'.format(row=str_data, fname=path),
],
user="root",
)
class SourceHTTP(SourceHTTPBase): class SourceHTTP(SourceHTTPBase):
@ -423,29 +494,46 @@ class SourceHTTPS(SourceHTTPBase):
class SourceCassandra(ExternalSource): class SourceCassandra(ExternalSource):
TYPE_MAPPING = { TYPE_MAPPING = {
'UInt8': 'tinyint', "UInt8": "tinyint",
'UInt16': 'smallint', "UInt16": "smallint",
'UInt32': 'int', "UInt32": "int",
'UInt64': 'bigint', "UInt64": "bigint",
'Int8': 'tinyint', "Int8": "tinyint",
'Int16': 'smallint', "Int16": "smallint",
'Int32': 'int', "Int32": "int",
'Int64': 'bigint', "Int64": "bigint",
'UUID': 'uuid', "UUID": "uuid",
'Date': 'date', "Date": "date",
'DateTime': 'timestamp', "DateTime": "timestamp",
'String': 'text', "String": "text",
'Float32': 'float', "Float32": "float",
'Float64': 'double' "Float64": "double",
} }
def __init__(self, name, internal_hostname, internal_port, docker_hostname, docker_port, user, password): def __init__(
ExternalSource.__init__(self, name, internal_hostname, internal_port, docker_hostname, docker_port, user, self,
password) name,
internal_hostname,
internal_port,
docker_hostname,
docker_port,
user,
password,
):
ExternalSource.__init__(
self,
name,
internal_hostname,
internal_port,
docker_hostname,
docker_port,
user,
password,
)
self.structure = dict() self.structure = dict()
def get_source_str(self, table_name): def get_source_str(self, table_name):
return ''' return """
<cassandra> <cassandra>
<host>{host}</host> <host>{host}</host>
<port>{port}</port> <port>{port}</port>
@ -454,7 +542,7 @@ class SourceCassandra(ExternalSource):
<allow_filtering>1</allow_filtering> <allow_filtering>1</allow_filtering>
<where>"Int64_" &lt; 1000000000000000000</where> <where>"Int64_" &lt; 1000000000000000000</where>
</cassandra> </cassandra>
'''.format( """.format(
host=self.docker_hostname, host=self.docker_hostname,
port=self.docker_port, port=self.docker_port,
table=table_name, table=table_name,
@ -464,49 +552,79 @@ class SourceCassandra(ExternalSource):
if self.internal_hostname is None: if self.internal_hostname is None:
self.internal_hostname = cluster.cassandra_ip self.internal_hostname = cluster.cassandra_ip
self.client = cassandra.cluster.Cluster([self.internal_hostname], port=self.internal_port) self.client = cassandra.cluster.Cluster(
[self.internal_hostname], port=self.internal_port
)
self.session = self.client.connect() self.session = self.client.connect()
self.session.execute( self.session.execute(
"create keyspace if not exists test with replication = {'class': 'SimpleStrategy', 'replication_factor' : 1};") "create keyspace if not exists test with replication = {'class': 'SimpleStrategy', 'replication_factor' : 1};"
)
self.session.execute('drop table if exists test."{}"'.format(table_name)) self.session.execute('drop table if exists test."{}"'.format(table_name))
self.structure[table_name] = structure self.structure[table_name] = structure
columns = ['"' + col.name + '" ' + self.TYPE_MAPPING[col.field_type] for col in structure.get_all_fields()] columns = [
'"' + col.name + '" ' + self.TYPE_MAPPING[col.field_type]
for col in structure.get_all_fields()
]
keys = ['"' + col.name + '"' for col in structure.keys] keys = ['"' + col.name + '"' for col in structure.keys]
query = 'create table test."{name}" ({columns}, primary key ({pk}));'.format( query = 'create table test."{name}" ({columns}, primary key ({pk}));'.format(
name=table_name, columns=', '.join(columns), pk=', '.join(keys)) name=table_name, columns=", ".join(columns), pk=", ".join(keys)
)
self.session.execute(query) self.session.execute(query)
self.prepared = True self.prepared = True
def get_value_to_insert(self, value, type): def get_value_to_insert(self, value, type):
if type == 'UUID': if type == "UUID":
return uuid.UUID(value) return uuid.UUID(value)
elif type == 'DateTime': elif type == "DateTime":
return datetime.datetime.strptime(value, '%Y-%m-%d %H:%M:%S') return datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
return value return value
def load_data(self, data, table_name): def load_data(self, data, table_name):
names_and_types = [(field.name, field.field_type) for field in self.structure[table_name].get_all_fields()] names_and_types = [
(field.name, field.field_type)
for field in self.structure[table_name].get_all_fields()
]
columns = ['"' + col[0] + '"' for col in names_and_types] columns = ['"' + col[0] + '"' for col in names_and_types]
insert = 'insert into test."{table}" ({columns}) values ({args})'.format( insert = 'insert into test."{table}" ({columns}) values ({args})'.format(
table=table_name, columns=','.join(columns), args=','.join(['%s'] * len(columns))) table=table_name,
columns=",".join(columns),
args=",".join(["%s"] * len(columns)),
)
for row in data: for row in data:
values = [self.get_value_to_insert(row.get_value_by_name(col[0]), col[1]) for col in names_and_types] values = [
self.get_value_to_insert(row.get_value_by_name(col[0]), col[1])
for col in names_and_types
]
self.session.execute(insert, values) self.session.execute(insert, values)
class SourceRedis(ExternalSource): class SourceRedis(ExternalSource):
def __init__( def __init__(
self, name, internal_hostname, internal_port, docker_hostname, docker_port, user, password, db_index, self,
storage_type name,
internal_hostname,
internal_port,
docker_hostname,
docker_port,
user,
password,
db_index,
storage_type,
): ):
super(SourceRedis, self).__init__( super(SourceRedis, self).__init__(
name, internal_hostname, internal_port, docker_hostname, docker_port, user, password name,
internal_hostname,
internal_port,
docker_hostname,
docker_port,
user,
password,
) )
self.storage_type = storage_type self.storage_type = storage_type
self.db_index = db_index self.db_index = db_index
def get_source_str(self, table_name): def get_source_str(self, table_name):
return ''' return """
<redis> <redis>
<host>{host}</host> <host>{host}</host>
<port>{port}</port> <port>{port}</port>
@ -514,7 +632,7 @@ class SourceRedis(ExternalSource):
<db_index>{db_index}</db_index> <db_index>{db_index}</db_index>
<storage_type>{storage_type}</storage_type> <storage_type>{storage_type}</storage_type>
</redis> </redis>
'''.format( """.format(
host=self.docker_hostname, host=self.docker_hostname,
port=self.docker_port, port=self.docker_port,
password=self.password, password=self.password,
@ -523,8 +641,12 @@ class SourceRedis(ExternalSource):
) )
def prepare(self, structure, table_name, cluster): def prepare(self, structure, table_name, cluster):
self.client = redis.StrictRedis(host=self.internal_hostname, port=self.internal_port, db=self.db_index, self.client = redis.StrictRedis(
password=self.password or None) host=self.internal_hostname,
port=self.internal_port,
db=self.db_index,
password=self.password or None,
)
self.prepared = True self.prepared = True
self.ordered_names = structure.get_ordered_names() self.ordered_names = structure.get_ordered_names()
@ -540,33 +662,52 @@ class SourceRedis(ExternalSource):
self.client.hset(*values) self.client.hset(*values)
def compatible_with_layout(self, layout): def compatible_with_layout(self, layout):
return layout.is_simple and self.storage_type == "simple" or layout.is_complex and self.storage_type == "hash_map" return (
layout.is_simple
and self.storage_type == "simple"
or layout.is_complex
and self.storage_type == "hash_map"
)
class SourceAerospike(ExternalSource): class SourceAerospike(ExternalSource):
def __init__(self, name, internal_hostname, internal_port, def __init__(
docker_hostname, docker_port, user, password): self,
ExternalSource.__init__(self, name, internal_hostname, internal_port, name,
docker_hostname, docker_port, user, password) internal_hostname,
internal_port,
docker_hostname,
docker_port,
user,
password,
):
ExternalSource.__init__(
self,
name,
internal_hostname,
internal_port,
docker_hostname,
docker_port,
user,
password,
)
self.namespace = "test" self.namespace = "test"
self.set = "test_set" self.set = "test_set"
def get_source_str(self, table_name): def get_source_str(self, table_name):
print("AEROSPIKE get source str") print("AEROSPIKE get source str")
return ''' return """
<aerospike> <aerospike>
<host>{host}</host> <host>{host}</host>
<port>{port}</port> <port>{port}</port>
</aerospike> </aerospike>
'''.format( """.format(
host=self.docker_hostname, host=self.docker_hostname,
port=self.docker_port, port=self.docker_port,
) )
def prepare(self, structure, table_name, cluster): def prepare(self, structure, table_name, cluster):
config = { config = {"hosts": [(self.internal_hostname, self.internal_port)]}
'hosts': [(self.internal_hostname, self.internal_port)]
}
self.client = aerospike.client(config).connect() self.client = aerospike.client(config).connect()
self.prepared = True self.prepared = True
print("PREPARED AEROSPIKE") print("PREPARED AEROSPIKE")
@ -601,10 +742,14 @@ class SourceAerospike(ExternalSource):
for value in values: for value in values:
key = (self.namespace, self.set, value[0]) key = (self.namespace, self.set, value[0])
print(key) print(key)
self.client.put(key, {"bin_value": value[1]}, policy={"key": aerospike.POLICY_KEY_SEND}) self.client.put(
key,
{"bin_value": value[1]},
policy={"key": aerospike.POLICY_KEY_SEND},
)
assert self.client.exists(key) assert self.client.exists(key)
else: else:
assert ("VALUES SIZE != 2") assert "VALUES SIZE != 2"
# print(values) # print(values)

View File

@ -10,27 +10,44 @@ import socket
import tempfile import tempfile
import logging import logging
import os import os
class mk_krb_conf(object): class mk_krb_conf(object):
def __init__(self, krb_conf, kdc_ip): def __init__(self, krb_conf, kdc_ip):
self.krb_conf = krb_conf self.krb_conf = krb_conf
self.kdc_ip = kdc_ip self.kdc_ip = kdc_ip
self.amended_krb_conf = None self.amended_krb_conf = None
def __enter__(self): def __enter__(self):
with open(self.krb_conf) as f: with open(self.krb_conf) as f:
content = f.read() content = f.read()
amended_content = content.replace('hdfskerberos', self.kdc_ip) amended_content = content.replace("hdfskerberos", self.kdc_ip)
self.amended_krb_conf = tempfile.NamedTemporaryFile(delete=False, mode="w+") self.amended_krb_conf = tempfile.NamedTemporaryFile(delete=False, mode="w+")
self.amended_krb_conf.write(amended_content) self.amended_krb_conf.write(amended_content)
self.amended_krb_conf.close() self.amended_krb_conf.close()
return self.amended_krb_conf.name return self.amended_krb_conf.name
def __exit__(self, type, value, traceback): def __exit__(self, type, value, traceback):
if self.amended_krb_conf is not None: if self.amended_krb_conf is not None:
self.amended_krb_conf.close() self.amended_krb_conf.close()
class HDFSApi(object): class HDFSApi(object):
def __init__(self, user, host, proxy_port, data_port, timeout=100, kerberized=False, principal=None, def __init__(
keytab=None, krb_conf=None, self,
protocol = "http", hdfs_ip = None, kdc_ip = None): user,
host,
proxy_port,
data_port,
timeout=100,
kerberized=False,
principal=None,
keytab=None,
krb_conf=None,
protocol="http",
hdfs_ip=None,
kdc_ip=None,
):
self.host = host self.host = host
self.protocol = protocol self.protocol = protocol
self.proxy_port = proxy_port self.proxy_port = proxy_port
@ -55,7 +72,11 @@ class HDFSApi(object):
if kerberized: if kerberized:
self._run_kinit() self._run_kinit()
self.kerberos_auth = reqkerb.HTTPKerberosAuth(mutual_authentication=reqkerb.DISABLED, hostname_override=self.host, principal=self.principal) self.kerberos_auth = reqkerb.HTTPKerberosAuth(
mutual_authentication=reqkerb.DISABLED,
hostname_override=self.host,
principal=self.principal,
)
if self.kerberos_auth is None: if self.kerberos_auth is None:
print("failed to obtain kerberos_auth") print("failed to obtain kerberos_auth")
else: else:
@ -70,7 +91,11 @@ class HDFSApi(object):
os.environ["KRB5_CONFIG"] = instantiated_krb_conf os.environ["KRB5_CONFIG"] = instantiated_krb_conf
cmd = "(kinit -R -t {keytab} -k {principal} || (sleep 5 && kinit -R -t {keytab} -k {principal})) ; klist".format(instantiated_krb_conf=instantiated_krb_conf, keytab=self.keytab, principal=self.principal) cmd = "(kinit -R -t {keytab} -k {principal} || (sleep 5 && kinit -R -t {keytab} -k {principal})) ; klist".format(
instantiated_krb_conf=instantiated_krb_conf,
keytab=self.keytab,
principal=self.principal,
)
start = time.time() start = time.time()
@ -79,10 +104,18 @@ class HDFSApi(object):
res = subprocess.run(cmd, shell=True) res = subprocess.run(cmd, shell=True)
if res.returncode != 0: if res.returncode != 0:
# check_call(...) from subprocess does not print stderr, so we do it manually # check_call(...) from subprocess does not print stderr, so we do it manually
logging.debug('Stderr:\n{}\n'.format(res.stderr.decode('utf-8'))) logging.debug(
logging.debug('Stdout:\n{}\n'.format(res.stdout.decode('utf-8'))) "Stderr:\n{}\n".format(res.stderr.decode("utf-8"))
logging.debug('Env:\n{}\n'.format(env)) )
raise Exception('Command {} return non-zero code {}: {}'.format(args, res.returncode, res.stderr.decode('utf-8'))) logging.debug(
"Stdout:\n{}\n".format(res.stdout.decode("utf-8"))
)
logging.debug("Env:\n{}\n".format(env))
raise Exception(
"Command {} return non-zero code {}: {}".format(
args, res.returncode, res.stderr.decode("utf-8")
)
)
logging.debug("KDC started, kinit successfully run") logging.debug("KDC started, kinit successfully run")
return return
@ -97,28 +130,60 @@ class HDFSApi(object):
for i in range(0, cnt): for i in range(0, cnt):
logging.debug(f"CALL: {str(kwargs)}") logging.debug(f"CALL: {str(kwargs)}")
response_data = func(**kwargs) response_data = func(**kwargs)
logging.debug(f"response_data:{response_data.content} headers:{response_data.headers}") logging.debug(
f"response_data:{response_data.content} headers:{response_data.headers}"
)
if response_data.status_code == expected_code: if response_data.status_code == expected_code:
return response_data return response_data
else: else:
logging.error(f"unexpected response_data.status_code {response_data.status_code} != {expected_code}") logging.error(
f"unexpected response_data.status_code {response_data.status_code} != {expected_code}"
)
time.sleep(1) time.sleep(1)
response_data.raise_for_status() response_data.raise_for_status()
def read_data(self, path, universal_newlines=True): def read_data(self, path, universal_newlines=True):
logging.debug("read_data protocol:{} host:{} ip:{} proxy port:{} data port:{} path: {}".format(self.protocol, self.host, self.hdfs_ip, self.proxy_port, self.data_port, path)) logging.debug(
response = self.req_wrapper(requests.get, 307, url="{protocol}://{ip}:{port}/webhdfs/v1{path}?op=OPEN".format(protocol=self.protocol, ip=self.hdfs_ip, port=self.proxy_port, path=path), headers={'host': str(self.hdfs_ip)}, allow_redirects=False, verify=False, auth=self.kerberos_auth) "read_data protocol:{} host:{} ip:{} proxy port:{} data port:{} path: {}".format(
self.protocol,
self.host,
self.hdfs_ip,
self.proxy_port,
self.data_port,
path,
)
)
response = self.req_wrapper(
requests.get,
307,
url="{protocol}://{ip}:{port}/webhdfs/v1{path}?op=OPEN".format(
protocol=self.protocol, ip=self.hdfs_ip, port=self.proxy_port, path=path
),
headers={"host": str(self.hdfs_ip)},
allow_redirects=False,
verify=False,
auth=self.kerberos_auth,
)
# additional_params = '&'.join(response.headers['Location'].split('&')[1:2]) # additional_params = '&'.join(response.headers['Location'].split('&')[1:2])
location = None location = None
if self.kerberized: if self.kerberized:
location = response.headers['Location'].replace("kerberizedhdfs1:1006", "{}:{}".format(self.hdfs_ip, self.data_port)) location = response.headers["Location"].replace(
"kerberizedhdfs1:1006", "{}:{}".format(self.hdfs_ip, self.data_port)
)
else: else:
location = response.headers['Location'].replace("hdfs1:50075", "{}:{}".format(self.hdfs_ip, self.data_port)) location = response.headers["Location"].replace(
"hdfs1:50075", "{}:{}".format(self.hdfs_ip, self.data_port)
)
logging.debug("redirected to {}".format(location)) logging.debug("redirected to {}".format(location))
response_data = self.req_wrapper(requests.get, 200, url=location, headers={'host': self.hdfs_ip}, response_data = self.req_wrapper(
verify=False, auth=self.kerberos_auth) requests.get,
200,
url=location,
headers={"host": self.hdfs_ip},
verify=False,
auth=self.kerberos_auth,
)
if universal_newlines: if universal_newlines:
return response_data.text return response_data.text
@ -126,23 +191,38 @@ class HDFSApi(object):
return response_data.content return response_data.content
def write_data(self, path, content): def write_data(self, path, content):
logging.debug("write_data protocol:{} host:{} port:{} path: {} user:{}, principal:{}".format( logging.debug(
self.protocol, self.host, self.proxy_port, path, self.user, self.principal)) "write_data protocol:{} host:{} port:{} path: {} user:{}, principal:{}".format(
named_file = NamedTemporaryFile(mode='wb+') self.protocol,
self.host,
self.proxy_port,
path,
self.user,
self.principal,
)
)
named_file = NamedTemporaryFile(mode="wb+")
fpath = named_file.name fpath = named_file.name
if isinstance(content, str): if isinstance(content, str):
content = content.encode() content = content.encode()
named_file.write(content) named_file.write(content)
named_file.flush() named_file.flush()
response = self.req_wrapper(requests.put, 307, response = self.req_wrapper(
url="{protocol}://{ip}:{port}/webhdfs/v1{path}?op=CREATE".format(protocol=self.protocol, ip=self.hdfs_ip, requests.put,
port=self.proxy_port, 307,
path=path, user=self.user), url="{protocol}://{ip}:{port}/webhdfs/v1{path}?op=CREATE".format(
protocol=self.protocol,
ip=self.hdfs_ip,
port=self.proxy_port,
path=path,
user=self.user,
),
allow_redirects=False, allow_redirects=False,
headers={'host': str(self.hdfs_ip)}, headers={"host": str(self.hdfs_ip)},
params={'overwrite' : 'true'}, params={"overwrite": "true"},
verify=False, auth=self.kerberos_auth verify=False,
auth=self.kerberos_auth,
) )
logging.debug("HDFS api response:{}".format(response.headers)) logging.debug("HDFS api response:{}".format(response.headers))
@ -150,23 +230,30 @@ class HDFSApi(object):
# additional_params = '&'.join( # additional_params = '&'.join(
# response.headers['Location'].split('&')[1:2] + ["user.name={}".format(self.user), "overwrite=true"]) # response.headers['Location'].split('&')[1:2] + ["user.name={}".format(self.user), "overwrite=true"])
if self.kerberized: if self.kerberized:
location = response.headers['Location'].replace("kerberizedhdfs1:1006", "{}:{}".format(self.hdfs_ip, self.data_port)) location = response.headers["Location"].replace(
"kerberizedhdfs1:1006", "{}:{}".format(self.hdfs_ip, self.data_port)
)
else: else:
location = response.headers['Location'].replace("hdfs1:50075", "{}:{}".format(self.hdfs_ip, self.data_port)) location = response.headers["Location"].replace(
"hdfs1:50075", "{}:{}".format(self.hdfs_ip, self.data_port)
)
with open(fpath, mode="rb") as fh: with open(fpath, mode="rb") as fh:
file_data = fh.read() file_data = fh.read()
protocol = "http" # self.protocol protocol = "http" # self.protocol
response = self.req_wrapper(requests.put, 201, response = self.req_wrapper(
requests.put,
201,
url="{location}".format(location=location), url="{location}".format(location=location),
data=file_data, data=file_data,
headers={'content-type':'text/plain', 'host': str(self.hdfs_ip)}, headers={"content-type": "text/plain", "host": str(self.hdfs_ip)},
params={'file': path, 'user.name' : self.user}, params={"file": path, "user.name": self.user},
allow_redirects=False, verify=False, auth=self.kerberos_auth allow_redirects=False,
verify=False,
auth=self.kerberos_auth,
) )
logging.debug(f"{response.content} {response.headers}") logging.debug(f"{response.content} {response.headers}")
def write_gzip_data(self, path, content): def write_gzip_data(self, path, content):
if isinstance(content, str): if isinstance(content, str):
content = content.encode() content = content.encode()
@ -176,4 +263,10 @@ class HDFSApi(object):
self.write_data(path, out.getvalue()) self.write_data(path, out.getvalue())
def read_gzip_data(self, path): def read_gzip_data(self, path):
return gzip.GzipFile(fileobj=io.BytesIO(self.read_data(path, universal_newlines=False))).read().decode() return (
gzip.GzipFile(
fileobj=io.BytesIO(self.read_data(path, universal_newlines=False))
)
.read()
.decode()
)

View File

@ -9,9 +9,14 @@ from http.server import BaseHTTPRequestHandler, HTTPServer
# Decorator used to see if authentication works for external dictionary who use a HTTP source. # Decorator used to see if authentication works for external dictionary who use a HTTP source.
def check_auth(fn): def check_auth(fn):
def wrapper(req): def wrapper(req):
auth_header = req.headers.get('authorization', None) auth_header = req.headers.get("authorization", None)
api_key = req.headers.get('api-key', None) api_key = req.headers.get("api-key", None)
if not auth_header or auth_header != 'Basic Zm9vOmJhcg==' or not api_key or api_key != 'secret': if (
not auth_header
or auth_header != "Basic Zm9vOmJhcg=="
or not api_key
or api_key != "secret"
):
req.send_response(401) req.send_response(401)
else: else:
fn(req) fn(req)
@ -35,15 +40,15 @@ def start_server(server_address, data_path, schema, cert_path, address_family):
def __send_headers(self): def __send_headers(self):
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'text/tsv') self.send_header("Content-type", "text/tsv")
self.end_headers() self.end_headers()
def __send_data(self, only_ids=None): def __send_data(self, only_ids=None):
with open(data_path, 'r') as fl: with open(data_path, "r") as fl:
reader = csv.reader(fl, delimiter='\t') reader = csv.reader(fl, delimiter="\t")
for row in reader: for row in reader:
if not only_ids or (row[0] in only_ids): if not only_ids or (row[0] in only_ids):
self.wfile.write(('\t'.join(row) + '\n').encode()) self.wfile.write(("\t".join(row) + "\n").encode())
def __read_and_decode_post_ids(self): def __read_and_decode_post_ids(self):
data = self.__read_and_decode_post_data() data = self.__read_and_decode_post_data()
@ -51,7 +56,7 @@ def start_server(server_address, data_path, schema, cert_path, address_family):
def __read_and_decode_post_data(self): def __read_and_decode_post_data(self):
transfer_encoding = self.headers.get("Transfer-encoding") transfer_encoding = self.headers.get("Transfer-encoding")
decoded = ""; decoded = ""
if transfer_encoding == "chunked": if transfer_encoding == "chunked":
while True: while True:
s = self.rfile.readline().decode() s = self.rfile.readline().decode()
@ -69,19 +74,29 @@ def start_server(server_address, data_path, schema, cert_path, address_family):
HTTPServer.address_family = socket.AF_INET6 HTTPServer.address_family = socket.AF_INET6
httpd = HTTPServer(server_address, TSVHTTPHandler) httpd = HTTPServer(server_address, TSVHTTPHandler)
if schema == "https": if schema == "https":
httpd.socket = ssl.wrap_socket(httpd.socket, certfile=cert_path, server_side=True) httpd.socket = ssl.wrap_socket(
httpd.socket, certfile=cert_path, server_side=True
)
httpd.serve_forever() httpd.serve_forever()
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Simple HTTP server returns data from file") parser = argparse.ArgumentParser(
description="Simple HTTP server returns data from file"
)
parser.add_argument("--host", default="localhost") parser.add_argument("--host", default="localhost")
parser.add_argument("--port", default=5555, type=int) parser.add_argument("--port", default=5555, type=int)
parser.add_argument("--data-path", required=True) parser.add_argument("--data-path", required=True)
parser.add_argument("--schema", choices=("http", "https"), required=True) parser.add_argument("--schema", choices=("http", "https"), required=True)
parser.add_argument("--cert-path", default="./fake_cert.pem") parser.add_argument("--cert-path", default="./fake_cert.pem")
parser.add_argument('--address-family', choices=("ipv4", "ipv6"), default="ipv4") parser.add_argument("--address-family", choices=("ipv4", "ipv6"), default="ipv4")
args = parser.parse_args() args = parser.parse_args()
start_server((args.host, args.port), args.data_path, args.schema, args.cert_path, args.address_family) start_server(
(args.host, args.port),
args.data_path,
args.schema,
args.cert_path,
args.address_family,
)

View File

@ -22,26 +22,38 @@ class PartitionManager:
self._netem_delayed_instances = [] self._netem_delayed_instances = []
_NetworkManager.get() _NetworkManager.get()
def drop_instance_zk_connections(self, instance, action='DROP'): def drop_instance_zk_connections(self, instance, action="DROP"):
self._check_instance(instance) self._check_instance(instance)
self._add_rule({'source': instance.ip_address, 'destination_port': 2181, 'action': action}) self._add_rule(
self._add_rule({'destination': instance.ip_address, 'source_port': 2181, 'action': action}) {"source": instance.ip_address, "destination_port": 2181, "action": action}
)
self._add_rule(
{"destination": instance.ip_address, "source_port": 2181, "action": action}
)
def restore_instance_zk_connections(self, instance, action='DROP'): def restore_instance_zk_connections(self, instance, action="DROP"):
self._check_instance(instance) self._check_instance(instance)
self._delete_rule({'source': instance.ip_address, 'destination_port': 2181, 'action': action}) self._delete_rule(
self._delete_rule({'destination': instance.ip_address, 'source_port': 2181, 'action': action}) {"source": instance.ip_address, "destination_port": 2181, "action": action}
)
self._delete_rule(
{"destination": instance.ip_address, "source_port": 2181, "action": action}
)
def partition_instances(self, left, right, port=None, action='DROP'): def partition_instances(self, left, right, port=None, action="DROP"):
self._check_instance(left) self._check_instance(left)
self._check_instance(right) self._check_instance(right)
def create_rule(src, dst): def create_rule(src, dst):
rule = {'source': src.ip_address, 'destination': dst.ip_address, 'action': action} rule = {
"source": src.ip_address,
"destination": dst.ip_address,
"action": action,
}
if port is not None: if port is not None:
rule['destination_port'] = port rule["destination_port"] = port
return rule return rule
self._add_rule(create_rule(left, right)) self._add_rule(create_rule(left, right))
@ -57,7 +69,9 @@ class PartitionManager:
while self._netem_delayed_instances: while self._netem_delayed_instances:
instance = self._netem_delayed_instances.pop() instance = self._netem_delayed_instances.pop()
instance.exec_in_container(["bash", "-c", "tc qdisc del dev eth0 root netem"], user="root") instance.exec_in_container(
["bash", "-c", "tc qdisc del dev eth0 root netem"], user="root"
)
def pop_rules(self): def pop_rules(self):
res = self._iptables_rules[:] res = self._iptables_rules[:]
@ -71,7 +85,7 @@ class PartitionManager:
@staticmethod @staticmethod
def _check_instance(instance): def _check_instance(instance):
if instance.ip_address is None: if instance.ip_address is None:
raise Exception('Instance + ' + instance.name + ' is not launched!') raise Exception("Instance + " + instance.name + " is not launched!")
def _add_rule(self, rule): def _add_rule(self, rule):
_NetworkManager.get().add_iptables_rule(**rule) _NetworkManager.get().add_iptables_rule(**rule)
@ -82,7 +96,14 @@ class PartitionManager:
self._iptables_rules.remove(rule) self._iptables_rules.remove(rule)
def _add_tc_netem_delay(self, instance, delay_ms): def _add_tc_netem_delay(self, instance, delay_ms):
instance.exec_in_container(["bash", "-c", "tc qdisc add dev eth0 root netem delay {}ms".format(delay_ms)], user="root") instance.exec_in_container(
[
"bash",
"-c",
"tc qdisc add dev eth0 root netem delay {}ms".format(delay_ms),
],
user="root",
)
self._netem_delayed_instances.append(instance) self._netem_delayed_instances.append(instance)
def __enter__(self): def __enter__(self):
@ -127,12 +148,12 @@ class _NetworkManager:
return cls._instance return cls._instance
def add_iptables_rule(self, **kwargs): def add_iptables_rule(self, **kwargs):
cmd = ['iptables', '--wait', '-I', 'DOCKER-USER', '1'] cmd = ["iptables", "--wait", "-I", "DOCKER-USER", "1"]
cmd.extend(self._iptables_cmd_suffix(**kwargs)) cmd.extend(self._iptables_cmd_suffix(**kwargs))
self._exec_run(cmd, privileged=True) self._exec_run(cmd, privileged=True)
def delete_iptables_rule(self, **kwargs): def delete_iptables_rule(self, **kwargs):
cmd = ['iptables', '--wait', '-D', 'DOCKER-USER'] cmd = ["iptables", "--wait", "-D", "DOCKER-USER"]
cmd.extend(self._iptables_cmd_suffix(**kwargs)) cmd.extend(self._iptables_cmd_suffix(**kwargs))
self._exec_run(cmd, privileged=True) self._exec_run(cmd, privileged=True)
@ -144,40 +165,66 @@ class _NetworkManager:
res = subprocess.run("iptables --wait -D DOCKER-USER 1", shell=True) res = subprocess.run("iptables --wait -D DOCKER-USER 1", shell=True)
if res.returncode != 0: if res.returncode != 0:
logging.info("All iptables rules cleared, " + str(iptables_iter) + " iterations, last error: " + str(res.stderr)) logging.info(
"All iptables rules cleared, "
+ str(iptables_iter)
+ " iterations, last error: "
+ str(res.stderr)
)
return return
@staticmethod @staticmethod
def _iptables_cmd_suffix( def _iptables_cmd_suffix(
source=None, destination=None, source=None,
source_port=None, destination_port=None, destination=None,
action=None, probability=None, custom_args=None): source_port=None,
destination_port=None,
action=None,
probability=None,
custom_args=None,
):
ret = [] ret = []
if probability is not None: if probability is not None:
ret.extend(['-m', 'statistic', '--mode', 'random', '--probability', str(probability)]) ret.extend(
ret.extend(['-p', 'tcp']) [
"-m",
"statistic",
"--mode",
"random",
"--probability",
str(probability),
]
)
ret.extend(["-p", "tcp"])
if source is not None: if source is not None:
ret.extend(['-s', source]) ret.extend(["-s", source])
if destination is not None: if destination is not None:
ret.extend(['-d', destination]) ret.extend(["-d", destination])
if source_port is not None: if source_port is not None:
ret.extend(['--sport', str(source_port)]) ret.extend(["--sport", str(source_port)])
if destination_port is not None: if destination_port is not None:
ret.extend(['--dport', str(destination_port)]) ret.extend(["--dport", str(destination_port)])
if action is not None: if action is not None:
ret.extend(['-j'] + action.split()) ret.extend(["-j"] + action.split())
if custom_args is not None: if custom_args is not None:
ret.extend(custom_args) ret.extend(custom_args)
return ret return ret
def __init__( def __init__(
self, self,
container_expire_timeout=50, container_exit_timeout=60, docker_api_version=os.environ.get("DOCKER_API_VERSION")): container_expire_timeout=50,
container_exit_timeout=60,
docker_api_version=os.environ.get("DOCKER_API_VERSION"),
):
self.container_expire_timeout = container_expire_timeout self.container_expire_timeout = container_expire_timeout
self.container_exit_timeout = container_exit_timeout self.container_exit_timeout = container_exit_timeout
self._docker_client = docker.DockerClient(base_url='unix:///var/run/docker.sock', version=docker_api_version, timeout=600) self._docker_client = docker.DockerClient(
base_url="unix:///var/run/docker.sock",
version=docker_api_version,
timeout=600,
)
self._container = None self._container = None
@ -194,29 +241,41 @@ class _NetworkManager:
except docker.errors.NotFound: except docker.errors.NotFound:
break break
except Exception as ex: except Exception as ex:
print("Error removing network blocade container, will try again", str(ex)) print(
"Error removing network blocade container, will try again",
str(ex),
)
time.sleep(i) time.sleep(i)
image = subprocess.check_output("docker images -q clickhouse/integration-helper 2>/dev/null", shell=True) image = subprocess.check_output(
"docker images -q clickhouse/integration-helper 2>/dev/null", shell=True
)
if not image.strip(): if not image.strip():
print("No network image helper, will try download") print("No network image helper, will try download")
# for some reason docker api may hang if image doesn't exist, so we download it # for some reason docker api may hang if image doesn't exist, so we download it
# before running # before running
for i in range(5): for i in range(5):
try: try:
subprocess.check_call("docker pull clickhouse/integration-helper", shell=True) # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL subprocess.check_call( # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL
"docker pull clickhouse/integration-helper", shell=True
)
break break
except: except:
time.sleep(i) time.sleep(i)
else: else:
raise Exception("Cannot pull clickhouse/integration-helper image") raise Exception("Cannot pull clickhouse/integration-helper image")
self._container = self._docker_client.containers.run('clickhouse/integration-helper', self._container = self._docker_client.containers.run(
auto_remove=True, "clickhouse/integration-helper",
command=('sleep %s' % self.container_exit_timeout), auto_remove=True,
# /run/xtables.lock passed inside for correct iptables --wait command=("sleep %s" % self.container_exit_timeout),
volumes={'/run/xtables.lock': {'bind': '/run/xtables.lock', 'mode': 'ro' }}, # /run/xtables.lock passed inside for correct iptables --wait
detach=True, network_mode='host') volumes={
"/run/xtables.lock": {"bind": "/run/xtables.lock", "mode": "ro"}
},
detach=True,
network_mode="host",
)
container_id = self._container.id container_id = self._container.id
self._container_expire_time = time.time() + self.container_expire_timeout self._container_expire_time = time.time() + self.container_expire_timeout
@ -233,8 +292,8 @@ class _NetworkManager:
container = self._ensure_container() container = self._ensure_container()
handle = self._docker_client.api.exec_create(container.id, cmd, **kwargs) handle = self._docker_client.api.exec_create(container.id, cmd, **kwargs)
output = self._docker_client.api.exec_start(handle).decode('utf8') output = self._docker_client.api.exec_start(handle).decode("utf8")
exit_code = self._docker_client.api.exec_inspect(handle)['ExitCode'] exit_code = self._docker_client.api.exec_inspect(handle)["ExitCode"]
if exit_code != 0: if exit_code != 0:
print(output) print(output)
@ -242,30 +301,56 @@ class _NetworkManager:
return output return output
# Approximately mesure network I/O speed for interface # Approximately mesure network I/O speed for interface
class NetThroughput(object): class NetThroughput(object):
def __init__(self, node): def __init__(self, node):
self.node = node self.node = node
# trying to get default interface and check it in /proc/net/dev # trying to get default interface and check it in /proc/net/dev
self.interface = self.node.exec_in_container(["bash", "-c", "awk '{print $1 \" \" $2}' /proc/net/route | grep 00000000 | awk '{print $1}'"]).strip() self.interface = self.node.exec_in_container(
check = self.node.exec_in_container(["bash", "-c", f'grep "^ *{self.interface}:" /proc/net/dev']).strip() [
if not check: # if check is not successful just try eth{1-10} "bash",
"-c",
"awk '{print $1 \" \" $2}' /proc/net/route | grep 00000000 | awk '{print $1}'",
]
).strip()
check = self.node.exec_in_container(
["bash", "-c", f'grep "^ *{self.interface}:" /proc/net/dev']
).strip()
if not check: # if check is not successful just try eth{1-10}
for i in range(10): for i in range(10):
try: try:
self.interface = self.node.exec_in_container(["bash", "-c", f"awk '{{print $1}}' /proc/net/route | grep 'eth{i}'"]).strip() self.interface = self.node.exec_in_container(
[
"bash",
"-c",
f"awk '{{print $1}}' /proc/net/route | grep 'eth{i}'",
]
).strip()
break break
except Exception as ex: except Exception as ex:
print(f"No interface eth{i}") print(f"No interface eth{i}")
else: else:
raise Exception("No interface eth{1-10} and default interface not specified in /proc/net/route, maybe some special network configuration") raise Exception(
"No interface eth{1-10} and default interface not specified in /proc/net/route, maybe some special network configuration"
)
try: try:
check = self.node.exec_in_container(["bash", "-c", f'grep "^ *{self.interface}:" /proc/net/dev']).strip() check = self.node.exec_in_container(
["bash", "-c", f'grep "^ *{self.interface}:" /proc/net/dev']
).strip()
if not check: if not check:
raise Exception(f"No such interface {self.interface} found in /proc/net/dev") raise Exception(
f"No such interface {self.interface} found in /proc/net/dev"
)
except: except:
logging.error("All available interfaces %s", self.node.exec_in_container(["bash", "-c", "cat /proc/net/dev"])) logging.error(
raise Exception(f"No such interface {self.interface} found in /proc/net/dev") "All available interfaces %s",
self.node.exec_in_container(["bash", "-c", "cat /proc/net/dev"]),
)
raise Exception(
f"No such interface {self.interface} found in /proc/net/dev"
)
self.current_in = self._get_in_bytes() self.current_in = self._get_in_bytes()
self.current_out = self._get_out_bytes() self.current_out = self._get_out_bytes()
@ -273,27 +358,47 @@ class NetThroughput(object):
def _get_in_bytes(self): def _get_in_bytes(self):
try: try:
result = self.node.exec_in_container(['bash', '-c', f'awk "/^ *{self.interface}:/"\' {{ if ($1 ~ /.*:[0-9][0-9]*/) {{ sub(/^.*:/, "") ; print $1 }} else {{ print $2 }} }}\' /proc/net/dev']) result = self.node.exec_in_container(
[
"bash",
"-c",
f'awk "/^ *{self.interface}:/"\' {{ if ($1 ~ /.*:[0-9][0-9]*/) {{ sub(/^.*:/, "") ; print $1 }} else {{ print $2 }} }}\' /proc/net/dev',
]
)
except: except:
raise Exception(f"Cannot receive in bytes from /proc/net/dev for interface {self.interface}") raise Exception(
f"Cannot receive in bytes from /proc/net/dev for interface {self.interface}"
)
try: try:
return int(result) return int(result)
except: except:
raise Exception(f"Got non-numeric in bytes '{result}' from /proc/net/dev for interface {self.interface}") raise Exception(
f"Got non-numeric in bytes '{result}' from /proc/net/dev for interface {self.interface}"
)
def _get_out_bytes(self): def _get_out_bytes(self):
try: try:
result = self.node.exec_in_container(['bash', '-c', f'awk "/^ *{self.interface}:/"\' {{ if ($1 ~ /.*:[0-9][0-9]*/) {{ print $9 }} else {{ print $10 }} }}\' /proc/net/dev']) result = self.node.exec_in_container(
[
"bash",
"-c",
f"awk \"/^ *{self.interface}:/\"' {{ if ($1 ~ /.*:[0-9][0-9]*/) {{ print $9 }} else {{ print $10 }} }}' /proc/net/dev",
]
)
except: except:
raise Exception(f"Cannot receive out bytes from /proc/net/dev for interface {self.interface}") raise Exception(
f"Cannot receive out bytes from /proc/net/dev for interface {self.interface}"
)
try: try:
return int(result) return int(result)
except: except:
raise Exception(f"Got non-numeric out bytes '{result}' from /proc/net/dev for interface {self.interface}") raise Exception(
f"Got non-numeric out bytes '{result}' from /proc/net/dev for interface {self.interface}"
)
def measure_speed(self, measure='bytes'): def measure_speed(self, measure="bytes"):
new_in = self._get_in_bytes() new_in = self._get_in_bytes()
new_out = self._get_out_bytes() new_out = self._get_out_bytes()
current_time = time.time() current_time = time.time()
@ -304,11 +409,11 @@ class NetThroughput(object):
self.current_in = new_in self.current_in = new_in
self.measure_time = current_time self.measure_time = current_time
if measure == 'bytes': if measure == "bytes":
return in_speed, out_speed return in_speed, out_speed
elif measure == 'kilobytes': elif measure == "kilobytes":
return in_speed / 1024., out_speed / 1024. return in_speed / 1024.0, out_speed / 1024.0
elif measure == 'megabytes': elif measure == "megabytes":
return in_speed / (1024 * 1024), out_speed / (1024 * 1024) return in_speed / (1024 * 1024), out_speed / (1024 * 1024)
else: else:
raise Exception(f"Unknown measure {measure}") raise Exception(f"Unknown measure {measure}")

View File

@ -23,11 +23,21 @@ postgres_table_template_5 = """
key Integer NOT NULL, value UUID, PRIMARY KEY(key)) key Integer NOT NULL, value UUID, PRIMARY KEY(key))
""" """
def get_postgres_conn(ip, port, database=False, auto_commit=True, database_name='postgres_database', replication=False):
def get_postgres_conn(
ip,
port,
database=False,
auto_commit=True,
database_name="postgres_database",
replication=False,
):
if database == True: if database == True:
conn_string = f"host={ip} port={port} dbname='{database_name}' user='postgres' password='mysecretpassword'" conn_string = f"host={ip} port={port} dbname='{database_name}' user='postgres' password='mysecretpassword'"
else: else:
conn_string = f"host={ip} port={port} user='postgres' password='mysecretpassword'" conn_string = (
f"host={ip} port={port} user='postgres' password='mysecretpassword'"
)
if replication: if replication:
conn_string += " replication='database'" conn_string += " replication='database'"
@ -38,33 +48,41 @@ def get_postgres_conn(ip, port, database=False, auto_commit=True, database_name=
conn.autocommit = True conn.autocommit = True
return conn return conn
def create_replication_slot(conn, slot_name='user_slot'):
def create_replication_slot(conn, slot_name="user_slot"):
cursor = conn.cursor() cursor = conn.cursor()
cursor.execute(f'CREATE_REPLICATION_SLOT {slot_name} LOGICAL pgoutput EXPORT_SNAPSHOT') cursor.execute(
f"CREATE_REPLICATION_SLOT {slot_name} LOGICAL pgoutput EXPORT_SNAPSHOT"
)
result = cursor.fetchall() result = cursor.fetchall()
print(result[0][0]) # slot name print(result[0][0]) # slot name
print(result[0][1]) # start lsn print(result[0][1]) # start lsn
print(result[0][2]) # snapshot print(result[0][2]) # snapshot
return result[0][2] return result[0][2]
def drop_replication_slot(conn, slot_name='user_slot'):
def drop_replication_slot(conn, slot_name="user_slot"):
cursor = conn.cursor() cursor = conn.cursor()
cursor.execute(f"select pg_drop_replication_slot('{slot_name}')") cursor.execute(f"select pg_drop_replication_slot('{slot_name}')")
def create_postgres_schema(cursor, schema_name): def create_postgres_schema(cursor, schema_name):
drop_postgres_schema(cursor, schema_name) drop_postgres_schema(cursor, schema_name)
cursor.execute(f'CREATE SCHEMA {schema_name}') cursor.execute(f"CREATE SCHEMA {schema_name}")
def drop_postgres_schema(cursor, schema_name): def drop_postgres_schema(cursor, schema_name):
cursor.execute(f'DROP SCHEMA IF EXISTS {schema_name} CASCADE') cursor.execute(f"DROP SCHEMA IF EXISTS {schema_name} CASCADE")
def create_postgres_table(cursor, table_name, replica_identity_full=False, template=postgres_table_template): def create_postgres_table(
cursor, table_name, replica_identity_full=False, template=postgres_table_template
):
drop_postgres_table(cursor, table_name) drop_postgres_table(cursor, table_name)
cursor.execute(template.format(table_name)) cursor.execute(template.format(table_name))
if replica_identity_full: if replica_identity_full:
cursor.execute(f'ALTER TABLE {table_name} REPLICA IDENTITY FULL;') cursor.execute(f"ALTER TABLE {table_name} REPLICA IDENTITY FULL;")
def drop_postgres_table(cursor, table_name): def drop_postgres_table(cursor, table_name):
cursor.execute(f"""DROP TABLE IF EXISTS "{table_name}" """) cursor.execute(f"""DROP TABLE IF EXISTS "{table_name}" """)
@ -74,6 +92,7 @@ def create_postgres_table_with_schema(cursor, schema_name, table_name):
drop_postgres_table_with_schema(cursor, schema_name, table_name) drop_postgres_table_with_schema(cursor, schema_name, table_name)
cursor.execute(postgres_table_template_4.format(schema_name, table_name)) cursor.execute(postgres_table_template_4.format(schema_name, table_name))
def drop_postgres_table_with_schema(cursor, schema_name, table_name): def drop_postgres_table_with_schema(cursor, schema_name, table_name):
cursor.execute(f"""DROP TABLE IF EXISTS "{schema_name}"."{table_name}" """) cursor.execute(f"""DROP TABLE IF EXISTS "{schema_name}"."{table_name}" """)
@ -102,14 +121,14 @@ class PostgresManager:
def prepare(self): def prepare(self):
conn = get_postgres_conn(ip=self.ip, port=self.port) conn = get_postgres_conn(ip=self.ip, port=self.port)
cursor = conn.cursor() cursor = conn.cursor()
self.create_postgres_db(cursor, 'postgres_database') self.create_postgres_db(cursor, "postgres_database")
self.create_clickhouse_postgres_db(ip=self.ip, port=self.port) self.create_clickhouse_postgres_db(ip=self.ip, port=self.port)
def clear(self): def clear(self):
if self.conn.closed == 0: if self.conn.closed == 0:
self.conn.close() self.conn.close()
for db in self.created_materialized_postgres_db_list.copy(): for db in self.created_materialized_postgres_db_list.copy():
self.drop_materialized_db(db); self.drop_materialized_db(db)
for db in self.created_ch_postgres_db_list.copy(): for db in self.created_ch_postgres_db_list.copy():
self.drop_clickhouse_postgres_db(db) self.drop_clickhouse_postgres_db(db)
if len(self.created_postgres_db_list) > 0: if len(self.created_postgres_db_list) > 0:
@ -122,38 +141,54 @@ class PostgresManager:
self.conn = get_postgres_conn(ip=self.ip, port=self.port, database=True) self.conn = get_postgres_conn(ip=self.ip, port=self.port, database=True)
return self.conn.cursor() return self.conn.cursor()
def create_postgres_db(self, cursor, name='postgres_database'): def create_postgres_db(self, cursor, name="postgres_database"):
self.drop_postgres_db(cursor, name) self.drop_postgres_db(cursor, name)
self.created_postgres_db_list.add(name) self.created_postgres_db_list.add(name)
cursor.execute(f"CREATE DATABASE {name}") cursor.execute(f"CREATE DATABASE {name}")
def drop_postgres_db(self, cursor, name='postgres_database'): def drop_postgres_db(self, cursor, name="postgres_database"):
cursor.execute(f"DROP DATABASE IF EXISTS {name}") cursor.execute(f"DROP DATABASE IF EXISTS {name}")
if name in self.created_postgres_db_list: if name in self.created_postgres_db_list:
self.created_postgres_db_list.remove(name) self.created_postgres_db_list.remove(name)
def create_clickhouse_postgres_db(self, ip, port, name='postgres_database', database_name='postgres_database', schema_name=''): def create_clickhouse_postgres_db(
self,
ip,
port,
name="postgres_database",
database_name="postgres_database",
schema_name="",
):
self.drop_clickhouse_postgres_db(name) self.drop_clickhouse_postgres_db(name)
self.created_ch_postgres_db_list.add(name) self.created_ch_postgres_db_list.add(name)
if len(schema_name) == 0: if len(schema_name) == 0:
self.instance.query(f''' self.instance.query(
f"""
CREATE DATABASE {name} CREATE DATABASE {name}
ENGINE = PostgreSQL('{ip}:{port}', '{database_name}', 'postgres', 'mysecretpassword')''') ENGINE = PostgreSQL('{ip}:{port}', '{database_name}', 'postgres', 'mysecretpassword')"""
)
else: else:
self.instance.query(f''' self.instance.query(
f"""
CREATE DATABASE {name} CREATE DATABASE {name}
ENGINE = PostgreSQL('{ip}:{port}', '{database_name}', 'postgres', 'mysecretpassword', '{schema_name}')''') ENGINE = PostgreSQL('{ip}:{port}', '{database_name}', 'postgres', 'mysecretpassword', '{schema_name}')"""
)
def drop_clickhouse_postgres_db(self, name='postgres_database'): def drop_clickhouse_postgres_db(self, name="postgres_database"):
self.instance.query(f'DROP DATABASE IF EXISTS {name}') self.instance.query(f"DROP DATABASE IF EXISTS {name}")
if name in self.created_ch_postgres_db_list: if name in self.created_ch_postgres_db_list:
self.created_ch_postgres_db_list.remove(name) self.created_ch_postgres_db_list.remove(name)
def create_materialized_db(
def create_materialized_db(self, ip, port, self,
materialized_database='test_database', postgres_database='postgres_database', ip,
settings=[], table_overrides=''): port,
materialized_database="test_database",
postgres_database="postgres_database",
settings=[],
table_overrides="",
):
self.created_materialized_postgres_db_list.add(materialized_database) self.created_materialized_postgres_db_list.add(materialized_database)
self.instance.query(f"DROP DATABASE IF EXISTS {materialized_database}") self.instance.query(f"DROP DATABASE IF EXISTS {materialized_database}")
@ -162,17 +197,17 @@ class PostgresManager:
create_query += " SETTINGS " create_query += " SETTINGS "
for i in range(len(settings)): for i in range(len(settings)):
if i != 0: if i != 0:
create_query += ', ' create_query += ", "
create_query += settings[i] create_query += settings[i]
create_query += table_overrides create_query += table_overrides
self.instance.query(create_query) self.instance.query(create_query)
assert materialized_database in self.instance.query('SHOW DATABASES') assert materialized_database in self.instance.query("SHOW DATABASES")
def drop_materialized_db(self, materialized_database='test_database'): def drop_materialized_db(self, materialized_database="test_database"):
self.instance.query(f'DROP DATABASE IF EXISTS {materialized_database} NO DELAY') self.instance.query(f"DROP DATABASE IF EXISTS {materialized_database} NO DELAY")
if materialized_database in self.created_materialized_postgres_db_list: if materialized_database in self.created_materialized_postgres_db_list:
self.created_materialized_postgres_db_list.remove(materialized_database) self.created_materialized_postgres_db_list.remove(materialized_database)
assert materialized_database not in self.instance.query('SHOW DATABASES') assert materialized_database not in self.instance.query("SHOW DATABASES")
def create_and_fill_postgres_table(self, table_name): def create_and_fill_postgres_table(self, table_name):
conn = get_postgres_conn(ip=self.ip, port=self.port, database=True) conn = get_postgres_conn(ip=self.ip, port=self.port, database=True)
@ -180,82 +215,109 @@ class PostgresManager:
self.create_and_fill_postgres_table_from_cursor(cursor, table_name) self.create_and_fill_postgres_table_from_cursor(cursor, table_name)
def create_and_fill_postgres_table_from_cursor(self, cursor, table_name): def create_and_fill_postgres_table_from_cursor(self, cursor, table_name):
create_postgres_table(cursor, table_name); create_postgres_table(cursor, table_name)
self.instance.query(f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers(50)") self.instance.query(
f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers(50)"
)
def create_and_fill_postgres_tables(self, tables_num, numbers=50): def create_and_fill_postgres_tables(self, tables_num, numbers=50):
conn = get_postgres_conn(ip=self.ip, port=self.port, database=True) conn = get_postgres_conn(ip=self.ip, port=self.port, database=True)
cursor = conn.cursor() cursor = conn.cursor()
self.create_and_fill_postgres_tables_from_cursor(cursor, tables_num, numbers=numbers) self.create_and_fill_postgres_tables_from_cursor(
cursor, tables_num, numbers=numbers
)
def create_and_fill_postgres_tables_from_cursor(self, cursor, tables_num, numbers=50): def create_and_fill_postgres_tables_from_cursor(
self, cursor, tables_num, numbers=50
):
for i in range(tables_num): for i in range(tables_num):
table_name = f'postgresql_replica_{i}' table_name = f"postgresql_replica_{i}"
create_postgres_table(cursor, table_name); create_postgres_table(cursor, table_name)
if numbers > 0: if numbers > 0:
self.instance.query(f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers({numbers})") self.instance.query(
f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers({numbers})"
)
queries = [ queries = [
'INSERT INTO postgresql_replica_{} select i, i from generate_series(0, 10000) as t(i);', "INSERT INTO postgresql_replica_{} select i, i from generate_series(0, 10000) as t(i);",
'DELETE FROM postgresql_replica_{} WHERE (value*value) % 3 = 0;', "DELETE FROM postgresql_replica_{} WHERE (value*value) % 3 = 0;",
'UPDATE postgresql_replica_{} SET value = value - 125 WHERE key % 2 = 0;', "UPDATE postgresql_replica_{} SET value = value - 125 WHERE key % 2 = 0;",
"UPDATE postgresql_replica_{} SET key=key+20000 WHERE key%2=0", "UPDATE postgresql_replica_{} SET key=key+20000 WHERE key%2=0",
'INSERT INTO postgresql_replica_{} select i, i from generate_series(40000, 50000) as t(i);', "INSERT INTO postgresql_replica_{} select i, i from generate_series(40000, 50000) as t(i);",
'DELETE FROM postgresql_replica_{} WHERE key % 10 = 0;', "DELETE FROM postgresql_replica_{} WHERE key % 10 = 0;",
'UPDATE postgresql_replica_{} SET value = value + 101 WHERE key % 2 = 1;', "UPDATE postgresql_replica_{} SET value = value + 101 WHERE key % 2 = 1;",
"UPDATE postgresql_replica_{} SET key=key+80000 WHERE key%2=1", "UPDATE postgresql_replica_{} SET key=key+80000 WHERE key%2=1",
'DELETE FROM postgresql_replica_{} WHERE value % 2 = 0;', "DELETE FROM postgresql_replica_{} WHERE value % 2 = 0;",
'UPDATE postgresql_replica_{} SET value = value + 2000 WHERE key % 5 = 0;', "UPDATE postgresql_replica_{} SET value = value + 2000 WHERE key % 5 = 0;",
'INSERT INTO postgresql_replica_{} select i, i from generate_series(200000, 250000) as t(i);', "INSERT INTO postgresql_replica_{} select i, i from generate_series(200000, 250000) as t(i);",
'DELETE FROM postgresql_replica_{} WHERE value % 3 = 0;', "DELETE FROM postgresql_replica_{} WHERE value % 3 = 0;",
'UPDATE postgresql_replica_{} SET value = value * 2 WHERE key % 3 = 0;', "UPDATE postgresql_replica_{} SET value = value * 2 WHERE key % 3 = 0;",
"UPDATE postgresql_replica_{} SET key=key+500000 WHERE key%2=1", "UPDATE postgresql_replica_{} SET key=key+500000 WHERE key%2=1",
'INSERT INTO postgresql_replica_{} select i, i from generate_series(1000000, 1050000) as t(i);', "INSERT INTO postgresql_replica_{} select i, i from generate_series(1000000, 1050000) as t(i);",
'DELETE FROM postgresql_replica_{} WHERE value % 9 = 2;', "DELETE FROM postgresql_replica_{} WHERE value % 9 = 2;",
"UPDATE postgresql_replica_{} SET key=key+10000000", "UPDATE postgresql_replica_{} SET key=key+10000000",
'UPDATE postgresql_replica_{} SET value = value + 2 WHERE key % 3 = 1;', "UPDATE postgresql_replica_{} SET value = value + 2 WHERE key % 3 = 1;",
'DELETE FROM postgresql_replica_{} WHERE value%5 = 0;' "DELETE FROM postgresql_replica_{} WHERE value%5 = 0;",
] ]
def assert_nested_table_is_created(instance, table_name, materialized_database='test_database', schema_name=''): def assert_nested_table_is_created(
instance, table_name, materialized_database="test_database", schema_name=""
):
if len(schema_name) == 0: if len(schema_name) == 0:
table = table_name table = table_name
else: else:
table = schema_name + "." + table_name table = schema_name + "." + table_name
print(f'Checking table {table} exists in {materialized_database}') print(f"Checking table {table} exists in {materialized_database}")
database_tables = instance.query(f'SHOW TABLES FROM {materialized_database}') database_tables = instance.query(f"SHOW TABLES FROM {materialized_database}")
while table not in database_tables: while table not in database_tables:
time.sleep(0.2) time.sleep(0.2)
database_tables = instance.query(f'SHOW TABLES FROM {materialized_database}') database_tables = instance.query(f"SHOW TABLES FROM {materialized_database}")
assert(table in database_tables) assert table in database_tables
def assert_number_of_columns(instance, expected, table_name, database_name='test_database'): def assert_number_of_columns(
result = instance.query(f"select count() from system.columns where table = '{table_name}' and database = '{database_name}' and not startsWith(name, '_')") instance, expected, table_name, database_name="test_database"
while (int(result) != expected): ):
result = instance.query(
f"select count() from system.columns where table = '{table_name}' and database = '{database_name}' and not startsWith(name, '_')"
)
while int(result) != expected:
time.sleep(1) time.sleep(1)
result = instance.query(f"select count() from system.columns where table = '{table_name}' and database = '{database_name}' and not startsWith(name, '_')") result = instance.query(
print('Number of columns ok') f"select count() from system.columns where table = '{table_name}' and database = '{database_name}' and not startsWith(name, '_')"
)
print("Number of columns ok")
def check_tables_are_synchronized(instance, table_name, order_by='key', postgres_database='postgres_database', materialized_database='test_database', schema_name=''): def check_tables_are_synchronized(
assert_nested_table_is_created(instance, table_name, materialized_database, schema_name) instance,
table_name,
order_by="key",
postgres_database="postgres_database",
materialized_database="test_database",
schema_name="",
):
assert_nested_table_is_created(
instance, table_name, materialized_database, schema_name
)
table_path = '' table_path = ""
if len(schema_name) == 0: if len(schema_name) == 0:
table_path = f'{materialized_database}.{table_name}' table_path = f"{materialized_database}.{table_name}"
else: else:
table_path = f'{materialized_database}.`{schema_name}.{table_name}`' table_path = f"{materialized_database}.`{schema_name}.{table_name}`"
print(f"Checking table is synchronized: {table_path}") print(f"Checking table is synchronized: {table_path}")
result_query = f'select * from {table_path} order by {order_by};' result_query = f"select * from {table_path} order by {order_by};"
expected = instance.query(f'select * from {postgres_database}.{table_name} order by {order_by};') expected = instance.query(
f"select * from {postgres_database}.{table_name} order by {order_by};"
)
result = instance.query(result_query) result = instance.query(result_query)
for _ in range(30): for _ in range(30):
@ -265,9 +327,16 @@ def check_tables_are_synchronized(instance, table_name, order_by='key', postgres
time.sleep(0.5) time.sleep(0.5)
result = instance.query(result_query) result = instance.query(result_query)
assert(result == expected) assert result == expected
def check_several_tables_are_synchronized(instance, tables_num, order_by='key', postgres_database='postgres_database', materialized_database='test_database', schema_name=''): def check_several_tables_are_synchronized(
instance,
tables_num,
order_by="key",
postgres_database="postgres_database",
materialized_database="test_database",
schema_name="",
):
for i in range(tables_num): for i in range(tables_num):
check_tables_are_synchronized(instance, f'postgresql_replica_{i}'); check_tables_are_synchronized(instance, f"postgresql_replica_{i}")

View File

@ -5,17 +5,17 @@ import os.path
# Without this function all workers will log to the same log file # Without this function all workers will log to the same log file
# and mix everything together making it much more difficult for troubleshooting. # and mix everything together making it much more difficult for troubleshooting.
def setup(): def setup():
worker_name = os.environ.get('PYTEST_XDIST_WORKER', 'master') worker_name = os.environ.get("PYTEST_XDIST_WORKER", "master")
if worker_name == 'master': if worker_name == "master":
return return
logger = logging.getLogger('') logger = logging.getLogger("")
new_handlers = [] new_handlers = []
handlers_to_remove = [] handlers_to_remove = []
for handler in logger.handlers: for handler in logger.handlers:
if isinstance(handler, logging.FileHandler): if isinstance(handler, logging.FileHandler):
filename, ext = os.path.splitext(handler.baseFilename) filename, ext = os.path.splitext(handler.baseFilename)
if not filename.endswith('-' + worker_name): if not filename.endswith("-" + worker_name):
new_filename = filename + '-' + worker_name new_filename = filename + "-" + worker_name
new_handler = logging.FileHandler(new_filename + ext) new_handler = logging.FileHandler(new_filename + ext)
new_handler.setFormatter(handler.formatter) new_handler.setFormatter(handler.formatter)
new_handler.setLevel(handler.level) new_handler.setLevel(handler.level)

View File

@ -13,12 +13,18 @@ class TSV:
elif isinstance(contents, str) or isinstance(contents, str): elif isinstance(contents, str) or isinstance(contents, str):
raw_lines = contents.splitlines(True) raw_lines = contents.splitlines(True)
elif isinstance(contents, list): elif isinstance(contents, list):
raw_lines = ['\t'.join(map(str, l)) if isinstance(l, list) else str(l) for l in contents] raw_lines = [
"\t".join(map(str, l)) if isinstance(l, list) else str(l)
for l in contents
]
elif isinstance(contents, TSV): elif isinstance(contents, TSV):
self.lines = contents.lines self.lines = contents.lines
return return
else: else:
raise TypeError("contents must be either file or string or list, actual type: " + type(contents).__name__) raise TypeError(
"contents must be either file or string or list, actual type: "
+ type(contents).__name__
)
self.lines = [l.strip() for l in raw_lines if l.strip()] self.lines = [l.strip() for l in raw_lines if l.strip()]
def __eq__(self, other): def __eq__(self, other):
@ -31,13 +37,18 @@ class TSV:
return self != TSV(other) return self != TSV(other)
return self.lines != other.lines return self.lines != other.lines
def diff(self, other, n1='', n2=''): def diff(self, other, n1="", n2=""):
if not isinstance(other, TSV): if not isinstance(other, TSV):
return self.diff(TSV(other), n1=n1, n2=n2) return self.diff(TSV(other), n1=n1, n2=n2)
return list(line.rstrip() for line in difflib.unified_diff(self.lines, other.lines, fromfile=n1, tofile=n2))[2:] return list(
line.rstrip()
for line in difflib.unified_diff(
self.lines, other.lines, fromfile=n1, tofile=n2
)
)[2:]
def __str__(self): def __str__(self):
return '\n'.join(self.lines) return "\n".join(self.lines)
def __repr__(self): def __repr__(self):
return self.__str__() return self.__str__()
@ -50,29 +61,70 @@ class TSV:
return [line.split("\t") for line in contents.split("\n") if line.strip()] return [line.split("\t") for line in contents.split("\n") if line.strip()]
def assert_eq_with_retry(instance, query, expectation, retry_count=20, sleep_time=0.5, stdin=None, timeout=None, def assert_eq_with_retry(
settings=None, user=None, ignore_error=False, get_result=lambda x: x): instance,
query,
expectation,
retry_count=20,
sleep_time=0.5,
stdin=None,
timeout=None,
settings=None,
user=None,
ignore_error=False,
get_result=lambda x: x,
):
expectation_tsv = TSV(expectation) expectation_tsv = TSV(expectation)
for i in range(retry_count): for i in range(retry_count):
try: try:
if TSV(get_result(instance.query(query, user=user, stdin=stdin, timeout=timeout, settings=settings, if (
ignore_error=ignore_error))) == expectation_tsv: TSV(
get_result(
instance.query(
query,
user=user,
stdin=stdin,
timeout=timeout,
settings=settings,
ignore_error=ignore_error,
)
)
)
== expectation_tsv
):
break break
time.sleep(sleep_time) time.sleep(sleep_time)
except Exception as ex: except Exception as ex:
logging.exception(f"assert_eq_with_retry retry {i+1} exception {ex}") logging.exception(f"assert_eq_with_retry retry {i+1} exception {ex}")
time.sleep(sleep_time) time.sleep(sleep_time)
else: else:
val = TSV(get_result(instance.query(query, user=user, stdin=stdin, timeout=timeout, settings=settings, val = TSV(
ignore_error=ignore_error))) get_result(
instance.query(
query,
user=user,
stdin=stdin,
timeout=timeout,
settings=settings,
ignore_error=ignore_error,
)
)
)
if expectation_tsv != val: if expectation_tsv != val:
raise AssertionError("'{}' != '{}'\n{}".format(expectation_tsv, val, '\n'.join( raise AssertionError(
expectation_tsv.diff(val, n1="expectation", n2="query")))) "'{}' != '{}'\n{}".format(
expectation_tsv,
val,
"\n".join(expectation_tsv.diff(val, n1="expectation", n2="query")),
)
)
def assert_logs_contain(instance, substring): def assert_logs_contain(instance, substring):
if not instance.contains_in_log(substring): if not instance.contains_in_log(substring):
raise AssertionError("'{}' not found in logs".format(substring)) raise AssertionError("'{}' not found in logs".format(substring))
def assert_logs_contain_with_retry(instance, substring, retry_count=20, sleep_time=0.5): def assert_logs_contain_with_retry(instance, substring, retry_count=20, sleep_time=0.5):
for i in range(retry_count): for i in range(retry_count):
try: try:
@ -85,7 +137,10 @@ def assert_logs_contain_with_retry(instance, substring, retry_count=20, sleep_ti
else: else:
raise AssertionError("'{}' not found in logs".format(substring)) raise AssertionError("'{}' not found in logs".format(substring))
def exec_query_with_retry(instance, query, retry_count=40, sleep_time=0.5, silent=False, settings={}):
def exec_query_with_retry(
instance, query, retry_count=40, sleep_time=0.5, silent=False, settings={}
):
exception = None exception = None
for cnt in range(retry_count): for cnt in range(retry_count):
try: try:
@ -96,16 +151,21 @@ def exec_query_with_retry(instance, query, retry_count=40, sleep_time=0.5, silen
except Exception as ex: except Exception as ex:
exception = ex exception = ex
if not silent: if not silent:
logging.exception(f"Failed to execute query '{query}' on {cnt} try on instance '{instance.name}' will retry") logging.exception(
f"Failed to execute query '{query}' on {cnt} try on instance '{instance.name}' will retry"
)
time.sleep(sleep_time) time.sleep(sleep_time)
else: else:
raise exception raise exception
def csv_compare(result, expected): def csv_compare(result, expected):
csv_result = TSV(result) csv_result = TSV(result)
csv_expected = TSV(expected) csv_expected = TSV(expected)
mismatch = [] mismatch = []
max_len = len(csv_result) if len(csv_result) > len(csv_expected) else len(csv_expected) max_len = (
len(csv_result) if len(csv_result) > len(csv_expected) else len(csv_expected)
)
for i in range(max_len): for i in range(max_len):
if i >= len(csv_result): if i >= len(csv_result):
mismatch.append("-[%d]=%s" % (i, csv_expected.lines[i])) mismatch.append("-[%d]=%s" % (i, csv_expected.lines[i]))

View File

@ -8,30 +8,30 @@ sys.path.insert(0, os.path.join(CURDIR))
from . import uexpect from . import uexpect
prompt = ':\) ' prompt = ":\) "
end_of_block = r'.*\r\n.*\r\n' end_of_block = r".*\r\n.*\r\n"
class client(object): class client(object):
def __init__(self, command=None, name='', log=None): def __init__(self, command=None, name="", log=None):
self.client = uexpect.spawn(['/bin/bash', '--noediting']) self.client = uexpect.spawn(["/bin/bash", "--noediting"])
if command is None: if command is None:
command = '/usr/bin/clickhouse-client' command = "/usr/bin/clickhouse-client"
self.client.command = command self.client.command = command
self.client.eol('\r') self.client.eol("\r")
self.client.logger(log, prefix=name) self.client.logger(log, prefix=name)
self.client.timeout(20) self.client.timeout(20)
self.client.expect('[#\$] ', timeout=2) self.client.expect("[#\$] ", timeout=2)
self.client.send(command) self.client.send(command)
def __enter__(self): def __enter__(self):
return self.client.__enter__() return self.client.__enter__()
def __exit__(self, type, value, traceback): def __exit__(self, type, value, traceback):
self.client.reader['kill_event'].set() self.client.reader["kill_event"].set()
# send Ctrl-C # send Ctrl-C
self.client.send('\x03', eol='') self.client.send("\x03", eol="")
time.sleep(0.3) time.sleep(0.3)
self.client.send('quit', eol='\r') self.client.send("quit", eol="\r")
self.client.send('\x03', eol='') self.client.send("\x03", eol="")
return self.client.__exit__(type, value, traceback) return self.client.__exit__(type, value, traceback)

View File

@ -25,7 +25,7 @@ class TimeoutError(Exception):
self.timeout = timeout self.timeout = timeout
def __str__(self): def __str__(self):
return 'Timeout %.3fs' % float(self.timeout) return "Timeout %.3fs" % float(self.timeout)
class ExpectTimeoutError(Exception): class ExpectTimeoutError(Exception):
@ -35,12 +35,12 @@ class ExpectTimeoutError(Exception):
self.buffer = buffer self.buffer = buffer
def __str__(self): def __str__(self):
s = 'Timeout %.3fs ' % float(self.timeout) s = "Timeout %.3fs " % float(self.timeout)
if self.pattern: if self.pattern:
s += 'for %s ' % repr(self.pattern.pattern) s += "for %s " % repr(self.pattern.pattern)
if self.buffer: if self.buffer:
s += 'buffer %s ' % repr(self.buffer[:]) s += "buffer %s " % repr(self.buffer[:])
s += 'or \'%s\'' % ','.join(['%x' % ord(c) for c in self.buffer[:]]) s += "or '%s'" % ",".join(["%x" % ord(c) for c in self.buffer[:]])
return s return s
@ -55,12 +55,12 @@ class IO(object):
TIMEOUT = Timeout TIMEOUT = Timeout
class Logger(object): class Logger(object):
def __init__(self, logger, prefix=''): def __init__(self, logger, prefix=""):
self._logger = logger self._logger = logger
self._prefix = prefix self._prefix = prefix
def write(self, data): def write(self, data):
self._logger.write(('\n' + data).replace('\n', '\n' + self._prefix)) self._logger.write(("\n" + data).replace("\n", "\n" + self._prefix))
def flush(self): def flush(self):
self._logger.flush() self._logger.flush()
@ -77,7 +77,7 @@ class IO(object):
self.reader = reader self.reader = reader
self._timeout = None self._timeout = None
self._logger = None self._logger = None
self._eol = '' self._eol = ""
def __enter__(self): def __enter__(self):
return self return self
@ -85,7 +85,7 @@ class IO(object):
def __exit__(self, type, value, traceback): def __exit__(self, type, value, traceback):
self.close() self.close()
def logger(self, logger=None, prefix=''): def logger(self, logger=None, prefix=""):
if logger: if logger:
self._logger = self.Logger(logger, prefix=prefix) self._logger = self.Logger(logger, prefix=prefix)
return self._logger return self._logger
@ -101,15 +101,15 @@ class IO(object):
return self._eol return self._eol
def close(self, force=True): def close(self, force=True):
self.reader['kill_event'].set() self.reader["kill_event"].set()
os.system('pkill -TERM -P %d' % self.process.pid) os.system("pkill -TERM -P %d" % self.process.pid)
if force: if force:
self.process.kill() self.process.kill()
else: else:
self.process.terminate() self.process.terminate()
os.close(self.master) os.close(self.master)
if self._logger: if self._logger:
self._logger.write('\n') self._logger.write("\n")
self._logger.flush() self._logger.flush()
def send(self, data, eol=None): def send(self, data, eol=None):
@ -135,9 +135,9 @@ class IO(object):
if self.buffer is not None: if self.buffer is not None:
self.match = pattern.search(self.buffer, 0) self.match = pattern.search(self.buffer, 0)
if self.match is not None: if self.match is not None:
self.after = self.buffer[self.match.start():self.match.end()] self.after = self.buffer[self.match.start() : self.match.end()]
self.before = self.buffer[:self.match.start()] self.before = self.buffer[: self.match.start()]
self.buffer = self.buffer[self.match.end():] self.buffer = self.buffer[self.match.end() :]
break break
if timeleft < 0: if timeleft < 0:
break break
@ -145,16 +145,16 @@ class IO(object):
data = self.read(timeout=timeleft, raise_exception=True) data = self.read(timeout=timeleft, raise_exception=True)
except TimeoutError: except TimeoutError:
if self._logger: if self._logger:
self._logger.write((self.buffer or '') + '\n') self._logger.write((self.buffer or "") + "\n")
self._logger.flush() self._logger.flush()
exception = ExpectTimeoutError(pattern, timeout, self.buffer) exception = ExpectTimeoutError(pattern, timeout, self.buffer)
self.buffer = None self.buffer = None
raise exception raise exception
timeleft -= (time.time() - start_time) timeleft -= time.time() - start_time
if data: if data:
self.buffer = (self.buffer + data) if self.buffer else data self.buffer = (self.buffer + data) if self.buffer else data
if self._logger: if self._logger:
self._logger.write((self.before or '') + (self.after or '')) self._logger.write((self.before or "") + (self.after or ""))
self._logger.flush() self._logger.flush()
if self.match is None: if self.match is None:
exception = ExpectTimeoutError(pattern, timeout, self.buffer) exception = ExpectTimeoutError(pattern, timeout, self.buffer)
@ -163,7 +163,7 @@ class IO(object):
return self.match return self.match
def read(self, timeout=0, raise_exception=False): def read(self, timeout=0, raise_exception=False):
data = '' data = ""
timeleft = timeout timeleft = timeout
try: try:
while timeleft >= 0: while timeleft >= 0:
@ -171,7 +171,7 @@ class IO(object):
data += self.queue.get(timeout=timeleft) data += self.queue.get(timeout=timeleft)
if data: if data:
break break
timeleft -= (time.time() - start_time) timeleft -= time.time() - start_time
except Empty: except Empty:
if data: if data:
return data return data
@ -186,7 +186,14 @@ class IO(object):
def spawn(command): def spawn(command):
master, slave = pty.openpty() master, slave = pty.openpty()
process = Popen(command, preexec_fn=os.setsid, stdout=slave, stdin=slave, stderr=slave, bufsize=1) process = Popen(
command,
preexec_fn=os.setsid,
stdout=slave,
stdin=slave,
stderr=slave,
bufsize=1,
)
os.close(slave) os.close(slave)
queue = Queue() queue = Queue()
@ -195,14 +202,19 @@ def spawn(command):
thread.daemon = True thread.daemon = True
thread.start() thread.start()
return IO(process, master, queue, reader={'thread': thread, 'kill_event': reader_kill_event}) return IO(
process,
master,
queue,
reader={"thread": thread, "kill_event": reader_kill_event},
)
def reader(process, out, queue, kill_event): def reader(process, out, queue, kill_event):
while True: while True:
try: try:
# TODO: there are some issues with 1<<16 buffer size # TODO: there are some issues with 1<<16 buffer size
data = os.read(out, 1<<17).decode(errors='replace') data = os.read(out, 1 << 17).decode(errors="replace")
queue.put(data) queue.put(data)
except: except:
if kill_event.is_set(): if kill_event.is_set():

View File

@ -11,11 +11,13 @@ class SafeThread(threading.Thread):
super().__init__() super().__init__()
self.target = target self.target = target
self.exception = None self.exception = None
def run(self): def run(self):
try: try:
self.target() self.target()
except Exception as e: # pylint: disable=broad-except except Exception as e: # pylint: disable=broad-except
self.exception = e self.exception = e
def join(self, timeout=None): def join(self, timeout=None):
super().join(timeout) super().join(timeout)
if self.exception: if self.exception:
@ -24,7 +26,7 @@ class SafeThread(threading.Thread):
def random_string(length): def random_string(length):
letters = string.ascii_letters letters = string.ascii_letters
return ''.join(random.choice(letters) for i in range(length)) return "".join(random.choice(letters) for i in range(length))
def generate_values(date_str, count, sign=1): def generate_values(date_str, count, sign=1):
@ -34,10 +36,10 @@ def generate_values(date_str, count, sign=1):
def replace_config(config_path, old, new): def replace_config(config_path, old, new):
config = open(config_path, 'r') config = open(config_path, "r")
config_lines = config.readlines() config_lines = config.readlines()
config.close() config.close()
config_lines = [line.replace(old, new) for line in config_lines] config_lines = [line.replace(old, new) for line in config_lines]
config = open(config_path, 'w') config = open(config_path, "w")
config.writelines(config_lines) config.writelines(config_lines)
config.close() config.close()

View File

@ -19,14 +19,19 @@ from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__) cluster = ClickHouseCluster(__file__)
node = cluster.add_instance('node', main_configs=[ node = cluster.add_instance(
'configs/no_system_log.xml', "node",
'configs/asynchronous_metrics_update_period_s.xml', main_configs=[
], user_configs=[ "configs/no_system_log.xml",
'configs/users.d/overrides.xml', "configs/asynchronous_metrics_update_period_s.xml",
]) ],
user_configs=[
"configs/users.d/overrides.xml",
],
)
@pytest.fixture(scope='module', autouse=True)
@pytest.fixture(scope="module", autouse=True)
def start_cluster(): def start_cluster():
try: try:
cluster.start() cluster.start()
@ -34,31 +39,39 @@ def start_cluster():
finally: finally:
cluster.shutdown() cluster.shutdown()
query_settings = { query_settings = {
'max_threads': 1, "max_threads": 1,
'log_queries': 0, "log_queries": 0,
} }
sample_query = "SELECT groupArray(repeat('a', 1000)) FROM numbers(10000) GROUP BY number%10 FORMAT JSON" sample_query = "SELECT groupArray(repeat('a', 1000)) FROM numbers(10000) GROUP BY number%10 FORMAT JSON"
def query(*args, **kwargs): def query(*args, **kwargs):
if 'settings' not in kwargs: if "settings" not in kwargs:
kwargs['settings'] = query_settings kwargs["settings"] = query_settings
else: else:
kwargs['settings'].update(query_settings) kwargs["settings"].update(query_settings)
return node.query(*args, **kwargs) return node.query(*args, **kwargs)
def http_query(*args, **kwargs): def http_query(*args, **kwargs):
if 'params' not in kwargs: if "params" not in kwargs:
kwargs['params'] = query_settings kwargs["params"] = query_settings
else: else:
kwargs['params'].update(query_settings) kwargs["params"].update(query_settings)
return node.http_query(*args, **kwargs) return node.http_query(*args, **kwargs)
def get_MemoryTracking(): def get_MemoryTracking():
return int(http_query("SELECT value FROM system.metrics WHERE metric = 'MemoryTracking'")) return int(
http_query("SELECT value FROM system.metrics WHERE metric = 'MemoryTracking'")
)
def check_memory(memory): def check_memory(memory):
# bytes -> megabytes # bytes -> megabytes
memory = [*map(lambda x: int(int(x)/1024/1024), memory)] memory = [*map(lambda x: int(int(x) / 1024 / 1024), memory)]
# 3 changes to MemoryTracking is minimum, since: # 3 changes to MemoryTracking is minimum, since:
# - this is not that high to not detect inacuracy # - this is not that high to not detect inacuracy
# - memory can go like X/X+N due to some background allocations # - memory can go like X/X+N due to some background allocations
@ -66,14 +79,19 @@ def check_memory(memory):
changes_allowed = 3 changes_allowed = 3
# if number of samples is large enough, use 10% from them # if number of samples is large enough, use 10% from them
# (actually most of the time there will be only few changes, it was made 10% to avoid flackiness) # (actually most of the time there will be only few changes, it was made 10% to avoid flackiness)
changes_allowed_auto=int(len(memory) * 0.1) changes_allowed_auto = int(len(memory) * 0.1)
changes_allowed = max(changes_allowed_auto, changes_allowed) changes_allowed = max(changes_allowed_auto, changes_allowed)
changed=len(set(memory)) changed = len(set(memory))
logging.info('Changes: allowed=%s, actual=%s, sample=%s', logging.info(
changes_allowed, changed, len(memory)) "Changes: allowed=%s, actual=%s, sample=%s",
changes_allowed,
changed,
len(memory),
)
assert changed < changes_allowed assert changed < changes_allowed
def test_http(): def test_http():
memory = [] memory = []
memory.append(get_MemoryTracking()) memory.append(get_MemoryTracking())
@ -82,6 +100,7 @@ def test_http():
memory.append(get_MemoryTracking()) memory.append(get_MemoryTracking())
check_memory(memory) check_memory(memory)
def test_tcp_multiple_sessions(): def test_tcp_multiple_sessions():
memory = [] memory = []
memory.append(get_MemoryTracking()) memory.append(get_MemoryTracking())
@ -90,6 +109,7 @@ def test_tcp_multiple_sessions():
memory.append(get_MemoryTracking()) memory.append(get_MemoryTracking())
check_memory(memory) check_memory(memory)
def test_tcp_single_session(): def test_tcp_single_session():
memory = [] memory = []
memory.append(get_MemoryTracking()) memory.append(get_MemoryTracking())
@ -97,9 +117,9 @@ def test_tcp_single_session():
sample_query, sample_query,
"SELECT metric, value FROM system.metrics WHERE metric = 'MemoryTracking'", "SELECT metric, value FROM system.metrics WHERE metric = 'MemoryTracking'",
] * 100 ] * 100
rows = query(';'.join(sample_queries)) rows = query(";".join(sample_queries))
memory = rows.split('\n') memory = rows.split("\n")
memory = filter(lambda x: x.startswith('MemoryTracking'), memory) memory = filter(lambda x: x.startswith("MemoryTracking"), memory)
memory = map(lambda x: x.split('\t')[1], memory) memory = map(lambda x: x.split("\t")[1], memory)
memory = [*memory] memory = [*memory]
check_memory(memory) check_memory(memory)

View File

@ -2,9 +2,15 @@ import pytest
from helpers.cluster import ClickHouseCluster from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__) cluster = ClickHouseCluster(__file__)
ch1 = cluster.add_instance('ch1', main_configs=["configs/config.d/clusters.xml"], with_zookeeper=True) ch1 = cluster.add_instance(
ch2 = cluster.add_instance('ch2', main_configs=["configs/config.d/clusters.xml"], with_zookeeper=True) "ch1", main_configs=["configs/config.d/clusters.xml"], with_zookeeper=True
ch3 = cluster.add_instance('ch3', main_configs=["configs/config.d/clusters.xml"], with_zookeeper=True) )
ch2 = cluster.add_instance(
"ch2", main_configs=["configs/config.d/clusters.xml"], with_zookeeper=True
)
ch3 = cluster.add_instance(
"ch3", main_configs=["configs/config.d/clusters.xml"], with_zookeeper=True
)
@pytest.fixture(scope="module", autouse=True) @pytest.fixture(scope="module", autouse=True)
@ -18,17 +24,23 @@ def started_cluster():
def test_access_control_on_cluster(): def test_access_control_on_cluster():
ch1.query_with_retry("CREATE USER IF NOT EXISTS Alex ON CLUSTER 'cluster'", retry_count=5) ch1.query_with_retry(
"CREATE USER IF NOT EXISTS Alex ON CLUSTER 'cluster'", retry_count=5
)
assert ch1.query("SHOW CREATE USER Alex") == "CREATE USER Alex\n" assert ch1.query("SHOW CREATE USER Alex") == "CREATE USER Alex\n"
assert ch2.query("SHOW CREATE USER Alex") == "CREATE USER Alex\n" assert ch2.query("SHOW CREATE USER Alex") == "CREATE USER Alex\n"
assert ch3.query("SHOW CREATE USER Alex") == "CREATE USER Alex\n" assert ch3.query("SHOW CREATE USER Alex") == "CREATE USER Alex\n"
ch2.query_with_retry("GRANT ON CLUSTER 'cluster' SELECT ON *.* TO Alex", retry_count=3) ch2.query_with_retry(
"GRANT ON CLUSTER 'cluster' SELECT ON *.* TO Alex", retry_count=3
)
assert ch1.query("SHOW GRANTS FOR Alex") == "GRANT SELECT ON *.* TO Alex\n" assert ch1.query("SHOW GRANTS FOR Alex") == "GRANT SELECT ON *.* TO Alex\n"
assert ch2.query("SHOW GRANTS FOR Alex") == "GRANT SELECT ON *.* TO Alex\n" assert ch2.query("SHOW GRANTS FOR Alex") == "GRANT SELECT ON *.* TO Alex\n"
assert ch3.query("SHOW GRANTS FOR Alex") == "GRANT SELECT ON *.* TO Alex\n" assert ch3.query("SHOW GRANTS FOR Alex") == "GRANT SELECT ON *.* TO Alex\n"
ch3.query_with_retry("REVOKE ON CLUSTER 'cluster' SELECT ON *.* FROM Alex", retry_count=3) ch3.query_with_retry(
"REVOKE ON CLUSTER 'cluster' SELECT ON *.* FROM Alex", retry_count=3
)
assert ch1.query("SHOW GRANTS FOR Alex") == "" assert ch1.query("SHOW GRANTS FOR Alex") == ""
assert ch2.query("SHOW GRANTS FOR Alex") == "" assert ch2.query("SHOW GRANTS FOR Alex") == ""
assert ch3.query("SHOW GRANTS FOR Alex") == "" assert ch3.query("SHOW GRANTS FOR Alex") == ""

View File

@ -3,7 +3,7 @@ import uuid
from helpers.cluster import ClickHouseCluster from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__) cluster = ClickHouseCluster(__file__)
instance = cluster.add_instance('instance', stay_alive=True) instance = cluster.add_instance("instance", stay_alive=True)
@pytest.fixture(scope="module", autouse=True) @pytest.fixture(scope="module", autouse=True)
@ -21,21 +21,32 @@ def test_access_rights_for_function():
instance.query("CREATE USER A") instance.query("CREATE USER A")
instance.query("CREATE USER B") instance.query("CREATE USER B")
assert "it's necessary to have grant CREATE FUNCTION ON *.*" in instance.query_and_get_error(create_function_query, user = 'A') assert (
"it's necessary to have grant CREATE FUNCTION ON *.*"
in instance.query_and_get_error(create_function_query, user="A")
)
instance.query("GRANT CREATE FUNCTION on *.* TO A") instance.query("GRANT CREATE FUNCTION on *.* TO A")
instance.query(create_function_query, user = 'A') instance.query(create_function_query, user="A")
assert instance.query("SELECT MySum(1, 2)") == "3\n" assert instance.query("SELECT MySum(1, 2)") == "3\n"
assert "it's necessary to have grant DROP FUNCTION ON *.*" in instance.query_and_get_error("DROP FUNCTION MySum", user = 'B') assert (
"it's necessary to have grant DROP FUNCTION ON *.*"
in instance.query_and_get_error("DROP FUNCTION MySum", user="B")
)
instance.query("GRANT DROP FUNCTION ON *.* TO B") instance.query("GRANT DROP FUNCTION ON *.* TO B")
instance.query("DROP FUNCTION MySum", user = 'B') instance.query("DROP FUNCTION MySum", user="B")
assert "Unknown function MySum" in instance.query_and_get_error("SELECT MySum(1, 2)") assert "Unknown function MySum" in instance.query_and_get_error(
"SELECT MySum(1, 2)"
)
instance.query("REVOKE CREATE FUNCTION ON *.* FROM A") instance.query("REVOKE CREATE FUNCTION ON *.* FROM A")
assert "it's necessary to have grant CREATE FUNCTION ON *.*" in instance.query_and_get_error(create_function_query, user = 'A') assert (
"it's necessary to have grant CREATE FUNCTION ON *.*"
in instance.query_and_get_error(create_function_query, user="A")
)
instance.query("DROP USER IF EXISTS A") instance.query("DROP USER IF EXISTS A")
instance.query("DROP USER IF EXISTS B") instance.query("DROP USER IF EXISTS B")
@ -45,13 +56,21 @@ def test_ignore_obsolete_grant_on_database():
instance.stop_clickhouse() instance.stop_clickhouse()
user_id = uuid.uuid4() user_id = uuid.uuid4()
instance.exec_in_container(["bash", "-c" , f""" instance.exec_in_container(
[
"bash",
"-c",
f"""
cat > /var/lib/clickhouse/access/{user_id}.sql << EOF cat > /var/lib/clickhouse/access/{user_id}.sql << EOF
ATTACH USER X; ATTACH USER X;
ATTACH GRANT CREATE FUNCTION, SELECT ON mydb.* TO X; ATTACH GRANT CREATE FUNCTION, SELECT ON mydb.* TO X;
EOF"""]) EOF""",
]
)
instance.exec_in_container(["bash", "-c" , "touch /var/lib/clickhouse/access/need_rebuild_lists.mark"]) instance.exec_in_container(
["bash", "-c", "touch /var/lib/clickhouse/access/need_rebuild_lists.mark"]
)
instance.start_clickhouse() instance.start_clickhouse()
assert instance.query("SHOW GRANTS FOR X") == "GRANT SELECT ON mydb.* TO X\n" assert instance.query("SHOW GRANTS FOR X") == "GRANT SELECT ON mydb.* TO X\n"

View File

@ -3,8 +3,8 @@ import pytest
from helpers.cluster import ClickHouseCluster from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__) cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1') node1 = cluster.add_instance("node1")
node2 = cluster.add_instance('node2') node2 = cluster.add_instance("node2")
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@ -14,10 +14,15 @@ def start_cluster():
for node in [node1, node2]: for node in [node1, node2]:
node.query( node.query(
"create table da_memory_efficient_shard(A Int64, B Int64) Engine=MergeTree order by A partition by B % 2;") "create table da_memory_efficient_shard(A Int64, B Int64) Engine=MergeTree order by A partition by B % 2;"
)
node1.query("insert into da_memory_efficient_shard select number, number from numbers(100000);") node1.query(
node2.query("insert into da_memory_efficient_shard select number + 100000, number from numbers(100000);") "insert into da_memory_efficient_shard select number, number from numbers(100000);"
)
node2.query(
"insert into da_memory_efficient_shard select number + 100000, number from numbers(100000);"
)
yield cluster yield cluster
@ -27,23 +32,29 @@ def start_cluster():
def test_remote(start_cluster): def test_remote(start_cluster):
node1.query( node1.query(
"set distributed_aggregation_memory_efficient = 1, group_by_two_level_threshold = 1, group_by_two_level_threshold_bytes=1") "set distributed_aggregation_memory_efficient = 1, group_by_two_level_threshold = 1, group_by_two_level_threshold_bytes=1"
)
res = node1.query( res = node1.query(
"select sum(a) from (SELECT B, uniqExact(A) a FROM remote('node{1,2}', default.da_memory_efficient_shard) GROUP BY B)") "select sum(a) from (SELECT B, uniqExact(A) a FROM remote('node{1,2}', default.da_memory_efficient_shard) GROUP BY B)"
assert res == '200000\n' )
assert res == "200000\n"
node1.query("set distributed_aggregation_memory_efficient = 0") node1.query("set distributed_aggregation_memory_efficient = 0")
res = node1.query( res = node1.query(
"select sum(a) from (SELECT B, uniqExact(A) a FROM remote('node{1,2}', default.da_memory_efficient_shard) GROUP BY B)") "select sum(a) from (SELECT B, uniqExact(A) a FROM remote('node{1,2}', default.da_memory_efficient_shard) GROUP BY B)"
assert res == '200000\n' )
assert res == "200000\n"
node1.query( node1.query(
"set distributed_aggregation_memory_efficient = 1, group_by_two_level_threshold = 1, group_by_two_level_threshold_bytes=1") "set distributed_aggregation_memory_efficient = 1, group_by_two_level_threshold = 1, group_by_two_level_threshold_bytes=1"
)
res = node1.query( res = node1.query(
"SELECT fullHostName() AS h, uniqExact(A) AS a FROM remote('node{1,2}', default.da_memory_efficient_shard) GROUP BY h ORDER BY h;") "SELECT fullHostName() AS h, uniqExact(A) AS a FROM remote('node{1,2}', default.da_memory_efficient_shard) GROUP BY h ORDER BY h;"
assert res == 'node1\t100000\nnode2\t100000\n' )
assert res == "node1\t100000\nnode2\t100000\n"
node1.query("set distributed_aggregation_memory_efficient = 0") node1.query("set distributed_aggregation_memory_efficient = 0")
res = node1.query( res = node1.query(
"SELECT fullHostName() AS h, uniqExact(A) AS a FROM remote('node{1,2}', default.da_memory_efficient_shard) GROUP BY h ORDER BY h;") "SELECT fullHostName() AS h, uniqExact(A) AS a FROM remote('node{1,2}', default.da_memory_efficient_shard) GROUP BY h ORDER BY h;"
assert res == 'node1\t100000\nnode2\t100000\n' )
assert res == "node1\t100000\nnode2\t100000\n"

View File

@ -2,31 +2,42 @@ import pytest
from helpers.cluster import ClickHouseCluster from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__) cluster = ClickHouseCluster(__file__)
server = cluster.add_instance('server', user_configs=["configs/users.d/network.xml"]) server = cluster.add_instance("server", user_configs=["configs/users.d/network.xml"])
clientA1 = cluster.add_instance('clientA1', hostname='clientA1.com') clientA1 = cluster.add_instance("clientA1", hostname="clientA1.com")
clientA2 = cluster.add_instance('clientA2', hostname='clientA2.com') clientA2 = cluster.add_instance("clientA2", hostname="clientA2.com")
clientA3 = cluster.add_instance('clientA3', hostname='clientA3.com') clientA3 = cluster.add_instance("clientA3", hostname="clientA3.com")
clientB1 = cluster.add_instance('clientB1', hostname='clientB001.ru') clientB1 = cluster.add_instance("clientB1", hostname="clientB001.ru")
clientB2 = cluster.add_instance('clientB2', hostname='clientB002.ru') clientB2 = cluster.add_instance("clientB2", hostname="clientB002.ru")
clientB3 = cluster.add_instance('clientB3', hostname='xxx.clientB003.rutracker.com') clientB3 = cluster.add_instance("clientB3", hostname="xxx.clientB003.rutracker.com")
clientC1 = cluster.add_instance('clientC1', hostname='clientC01.ru') clientC1 = cluster.add_instance("clientC1", hostname="clientC01.ru")
clientC2 = cluster.add_instance('clientC2', hostname='xxx.clientC02.ru') clientC2 = cluster.add_instance("clientC2", hostname="xxx.clientC02.ru")
clientC3 = cluster.add_instance('clientC3', hostname='xxx.clientC03.rutracker.com') clientC3 = cluster.add_instance("clientC3", hostname="xxx.clientC03.rutracker.com")
clientD1 = cluster.add_instance('clientD1', hostname='clientD0001.ru') clientD1 = cluster.add_instance("clientD1", hostname="clientD0001.ru")
clientD2 = cluster.add_instance('clientD2', hostname='xxx.clientD0002.ru') clientD2 = cluster.add_instance("clientD2", hostname="xxx.clientD0002.ru")
clientD3 = cluster.add_instance('clientD3', hostname='clientD0003.ru') clientD3 = cluster.add_instance("clientD3", hostname="clientD0003.ru")
def check_clickhouse_is_ok(client_node, server_node): def check_clickhouse_is_ok(client_node, server_node):
assert client_node.exec_in_container( assert (
["bash", "-c", "/usr/bin/curl -s {}:8123 ".format(server_node.hostname)]) == "Ok.\n" client_node.exec_in_container(
["bash", "-c", "/usr/bin/curl -s {}:8123 ".format(server_node.hostname)]
)
== "Ok.\n"
)
def query_from_one_node_to_another(client_node, server_node, query): def query_from_one_node_to_another(client_node, server_node, query):
check_clickhouse_is_ok(client_node, server_node) check_clickhouse_is_ok(client_node, server_node)
return client_node.exec_in_container( return client_node.exec_in_container(
["bash", "-c", "/usr/bin/clickhouse client --host {} --query {!r}".format(server_node.hostname, query)]) [
"bash",
"-c",
"/usr/bin/clickhouse client --host {} --query {!r}".format(
server_node.hostname, query
),
]
)
def query(node, query): def query(node, query):
@ -38,7 +49,10 @@ def setup_nodes():
try: try:
cluster.start() cluster.start()
query(server, "DROP TABLE IF EXISTS test_allowed_client_hosts") query(server, "DROP TABLE IF EXISTS test_allowed_client_hosts")
query(server, "CREATE TABLE test_allowed_client_hosts (x Int32) ENGINE = MergeTree() ORDER BY tuple()") query(
server,
"CREATE TABLE test_allowed_client_hosts (x Int32) ENGINE = MergeTree() ORDER BY tuple()",
)
query(server, "INSERT INTO test_allowed_client_hosts VALUES (5)") query(server, "INSERT INTO test_allowed_client_hosts VALUES (5)")
yield cluster yield cluster
@ -58,8 +72,15 @@ def test_allowed_host():
# expected_to_fail.extend([clientC3, clientD2]) # expected_to_fail.extend([clientC3, clientD2])
for client_node in expected_to_pass: for client_node in expected_to_pass:
assert query_from_one_node_to_another(client_node, server, "SELECT * FROM test_allowed_client_hosts") == "5\n" assert (
query_from_one_node_to_another(
client_node, server, "SELECT * FROM test_allowed_client_hosts"
)
== "5\n"
)
for client_node in expected_to_fail: for client_node in expected_to_fail:
with pytest.raises(Exception, match=r'default: Authentication failed'): with pytest.raises(Exception, match=r"default: Authentication failed"):
query_from_one_node_to_another(client_node, server, "SELECT * FROM test_allowed_client_hosts") query_from_one_node_to_another(
client_node, server, "SELECT * FROM test_allowed_client_hosts"
)

View File

@ -2,13 +2,23 @@ import pytest
from helpers.cluster import ClickHouseCluster from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__) cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', main_configs=['configs/config_with_hosts.xml']) node1 = cluster.add_instance("node1", main_configs=["configs/config_with_hosts.xml"])
node2 = cluster.add_instance('node2', main_configs=['configs/config_with_only_primary_hosts.xml']) node2 = cluster.add_instance(
node3 = cluster.add_instance('node3', main_configs=['configs/config_with_only_regexp_hosts.xml']) "node2", main_configs=["configs/config_with_only_primary_hosts.xml"]
node4 = cluster.add_instance('node4', main_configs=[]) # No `remote_url_allow_hosts` at all. )
node5 = cluster.add_instance('node5', main_configs=['configs/config_without_allowed_hosts.xml']) node3 = cluster.add_instance(
node6 = cluster.add_instance('node6', main_configs=['configs/config_for_remote.xml']) "node3", main_configs=["configs/config_with_only_regexp_hosts.xml"]
node7 = cluster.add_instance('node7', main_configs=['configs/config_for_redirect.xml'], with_hdfs=True) )
node4 = cluster.add_instance(
"node4", main_configs=[]
) # No `remote_url_allow_hosts` at all.
node5 = cluster.add_instance(
"node5", main_configs=["configs/config_without_allowed_hosts.xml"]
)
node6 = cluster.add_instance("node6", main_configs=["configs/config_for_remote.xml"])
node7 = cluster.add_instance(
"node7", main_configs=["configs/config_for_redirect.xml"], with_hdfs=True
)
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@ -21,97 +31,229 @@ def start_cluster():
def test_config_with_hosts(start_cluster): def test_config_with_hosts(start_cluster):
assert node1.query("CREATE TABLE table_test_1_1 (word String) Engine=URL('http://host:80', HDFS)") == "" assert (
assert node1.query("CREATE TABLE table_test_1_2 (word String) Engine=URL('https://yandex.ru', CSV)") == "" node1.query(
"CREATE TABLE table_test_1_1 (word String) Engine=URL('http://host:80', HDFS)"
)
== ""
)
assert (
node1.query(
"CREATE TABLE table_test_1_2 (word String) Engine=URL('https://yandex.ru', CSV)"
)
== ""
)
assert "not allowed" in node1.query_and_get_error( assert "not allowed" in node1.query_and_get_error(
"CREATE TABLE table_test_1_4 (word String) Engine=URL('https://host:123', S3)") "CREATE TABLE table_test_1_4 (word String) Engine=URL('https://host:123', S3)"
)
assert "not allowed" in node1.query_and_get_error( assert "not allowed" in node1.query_and_get_error(
"CREATE TABLE table_test_1_4 (word String) Engine=URL('https://yandex2.ru', CSV)") "CREATE TABLE table_test_1_4 (word String) Engine=URL('https://yandex2.ru', CSV)"
)
def test_config_with_only_primary_hosts(start_cluster): def test_config_with_only_primary_hosts(start_cluster):
assert node2.query("CREATE TABLE table_test_2_1 (word String) Engine=URL('https://host:80', CSV)") == "" assert (
assert node2.query("CREATE TABLE table_test_2_2 (word String) Engine=URL('https://host:123', S3)") == "" node2.query(
assert node2.query("CREATE TABLE table_test_2_3 (word String) Engine=URL('https://yandex.ru', CSV)") == "" "CREATE TABLE table_test_2_1 (word String) Engine=URL('https://host:80', CSV)"
assert node2.query("CREATE TABLE table_test_2_4 (word String) Engine=URL('https://yandex.ru:87', HDFS)") == "" )
== ""
)
assert (
node2.query(
"CREATE TABLE table_test_2_2 (word String) Engine=URL('https://host:123', S3)"
)
== ""
)
assert (
node2.query(
"CREATE TABLE table_test_2_3 (word String) Engine=URL('https://yandex.ru', CSV)"
)
== ""
)
assert (
node2.query(
"CREATE TABLE table_test_2_4 (word String) Engine=URL('https://yandex.ru:87', HDFS)"
)
== ""
)
assert "not allowed" in node2.query_and_get_error( assert "not allowed" in node2.query_and_get_error(
"CREATE TABLE table_test_2_5 (word String) Engine=URL('https://host', HDFS)") "CREATE TABLE table_test_2_5 (word String) Engine=URL('https://host', HDFS)"
)
assert "not allowed" in node2.query_and_get_error( assert "not allowed" in node2.query_and_get_error(
"CREATE TABLE table_test_2_5 (word String) Engine=URL('https://host:234', CSV)") "CREATE TABLE table_test_2_5 (word String) Engine=URL('https://host:234', CSV)"
)
assert "not allowed" in node2.query_and_get_error( assert "not allowed" in node2.query_and_get_error(
"CREATE TABLE table_test_2_6 (word String) Engine=URL('https://yandex2.ru', S3)") "CREATE TABLE table_test_2_6 (word String) Engine=URL('https://yandex2.ru', S3)"
)
def test_config_with_only_regexp_hosts(start_cluster): def test_config_with_only_regexp_hosts(start_cluster):
assert node3.query("CREATE TABLE table_test_3_1 (word String) Engine=URL('https://host:80', HDFS)") == "" assert (
assert node3.query("CREATE TABLE table_test_3_2 (word String) Engine=URL('https://yandex.ru', CSV)") == "" node3.query(
"CREATE TABLE table_test_3_1 (word String) Engine=URL('https://host:80', HDFS)"
)
== ""
)
assert (
node3.query(
"CREATE TABLE table_test_3_2 (word String) Engine=URL('https://yandex.ru', CSV)"
)
== ""
)
assert "not allowed" in node3.query_and_get_error( assert "not allowed" in node3.query_and_get_error(
"CREATE TABLE table_test_3_3 (word String) Engine=URL('https://host', CSV)") "CREATE TABLE table_test_3_3 (word String) Engine=URL('https://host', CSV)"
)
assert "not allowed" in node3.query_and_get_error( assert "not allowed" in node3.query_and_get_error(
"CREATE TABLE table_test_3_4 (word String) Engine=URL('https://yandex2.ru', S3)") "CREATE TABLE table_test_3_4 (word String) Engine=URL('https://yandex2.ru', S3)"
)
def test_config_without_allowed_hosts_section(start_cluster): def test_config_without_allowed_hosts_section(start_cluster):
assert node4.query("CREATE TABLE table_test_4_1 (word String) Engine=URL('https://host:80', CSV)") == "" assert (
assert node4.query("CREATE TABLE table_test_4_2 (word String) Engine=S3('https://host:80/bucket/key', CSV)") == "" node4.query(
assert node4.query("CREATE TABLE table_test_4_3 (word String) Engine=URL('https://host', HDFS)") == "" "CREATE TABLE table_test_4_1 (word String) Engine=URL('https://host:80', CSV)"
assert node4.query("CREATE TABLE table_test_4_4 (word String) Engine=URL('https://yandex.ru', CSV)") == "" )
assert node4.query("CREATE TABLE table_test_4_5 (word String) Engine=URL('ftp://something.com', S3)") == "" == ""
)
assert (
node4.query(
"CREATE TABLE table_test_4_2 (word String) Engine=S3('https://host:80/bucket/key', CSV)"
)
== ""
)
assert (
node4.query(
"CREATE TABLE table_test_4_3 (word String) Engine=URL('https://host', HDFS)"
)
== ""
)
assert (
node4.query(
"CREATE TABLE table_test_4_4 (word String) Engine=URL('https://yandex.ru', CSV)"
)
== ""
)
assert (
node4.query(
"CREATE TABLE table_test_4_5 (word String) Engine=URL('ftp://something.com', S3)"
)
== ""
)
def test_config_without_allowed_hosts(start_cluster): def test_config_without_allowed_hosts(start_cluster):
assert "not allowed" in node5.query_and_get_error( assert "not allowed" in node5.query_and_get_error(
"CREATE TABLE table_test_5_1 (word String) Engine=URL('https://host:80', CSV)") "CREATE TABLE table_test_5_1 (word String) Engine=URL('https://host:80', CSV)"
)
assert "not allowed" in node5.query_and_get_error( assert "not allowed" in node5.query_and_get_error(
"CREATE TABLE table_test_5_2 (word String) Engine=S3('https://host:80/bucket/key', CSV)") "CREATE TABLE table_test_5_2 (word String) Engine=S3('https://host:80/bucket/key', CSV)"
)
assert "not allowed" in node5.query_and_get_error( assert "not allowed" in node5.query_and_get_error(
"CREATE TABLE table_test_5_3 (word String) Engine=URL('https://host', HDFS)") "CREATE TABLE table_test_5_3 (word String) Engine=URL('https://host', HDFS)"
)
assert "not allowed" in node5.query_and_get_error( assert "not allowed" in node5.query_and_get_error(
"CREATE TABLE table_test_5_4 (word String) Engine=URL('https://yandex.ru', CSV)") "CREATE TABLE table_test_5_4 (word String) Engine=URL('https://yandex.ru', CSV)"
)
assert "not allowed" in node5.query_and_get_error( assert "not allowed" in node5.query_and_get_error(
"CREATE TABLE table_test_5_5 (word String) Engine=URL('ftp://something.com', S3)") "CREATE TABLE table_test_5_5 (word String) Engine=URL('ftp://something.com', S3)"
)
def test_table_function_remote(start_cluster): def test_table_function_remote(start_cluster):
assert "not allowed in configuration file" not in node6.query_and_get_error( assert "not allowed in configuration file" not in node6.query_and_get_error(
"SELECT * FROM remoteSecure('example01-01-{1|2}', system, events)", "SELECT * FROM remoteSecure('example01-01-{1|2}', system, events)",
settings={"connections_with_failover_max_tries": 1, "connect_timeout_with_failover_ms": 1000, settings={
"connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout": 1}) "connections_with_failover_max_tries": 1,
"connect_timeout_with_failover_ms": 1000,
"connect_timeout_with_failover_secure_ms": 1000,
"connect_timeout": 1,
"send_timeout": 1,
},
)
assert "not allowed in configuration file" not in node6.query_and_get_error( assert "not allowed in configuration file" not in node6.query_and_get_error(
"SELECT * FROM remoteSecure('example01-01-1,example01-02-1', system, events)", "SELECT * FROM remoteSecure('example01-01-1,example01-02-1', system, events)",
settings={"connections_with_failover_max_tries": 1, "connect_timeout_with_failover_ms": 1000, settings={
"connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout": 1}) "connections_with_failover_max_tries": 1,
"connect_timeout_with_failover_ms": 1000,
"connect_timeout_with_failover_secure_ms": 1000,
"connect_timeout": 1,
"send_timeout": 1,
},
)
assert "not allowed in configuration file" not in node6.query_and_get_error( assert "not allowed in configuration file" not in node6.query_and_get_error(
"SELECT * FROM remote('example01-0{1,2}-1', system, events", "SELECT * FROM remote('example01-0{1,2}-1', system, events",
settings={"connections_with_failover_max_tries": 1, "connect_timeout_with_failover_ms": 1000, settings={
"connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout": 1}) "connections_with_failover_max_tries": 1,
"connect_timeout_with_failover_ms": 1000,
"connect_timeout_with_failover_secure_ms": 1000,
"connect_timeout": 1,
"send_timeout": 1,
},
)
assert "not allowed in configuration file" not in node6.query_and_get_error( assert "not allowed in configuration file" not in node6.query_and_get_error(
"SELECT * FROM remote('example01-0{1,2}-{1|2}', system, events)", "SELECT * FROM remote('example01-0{1,2}-{1|2}', system, events)",
settings={"connections_with_failover_max_tries": 1, "connect_timeout_with_failover_ms": 1000, settings={
"connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout": 1}) "connections_with_failover_max_tries": 1,
"connect_timeout_with_failover_ms": 1000,
"connect_timeout_with_failover_secure_ms": 1000,
"connect_timeout": 1,
"send_timeout": 1,
},
)
assert "not allowed in configuration file" not in node6.query_and_get_error( assert "not allowed in configuration file" not in node6.query_and_get_error(
"SELECT * FROM remoteSecure('example01-{01..02}-{1|2}', system, events)", "SELECT * FROM remoteSecure('example01-{01..02}-{1|2}', system, events)",
settings={"connections_with_failover_max_tries": 1, "connect_timeout_with_failover_ms": 1000, settings={
"connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout": 1}) "connections_with_failover_max_tries": 1,
"connect_timeout_with_failover_ms": 1000,
"connect_timeout_with_failover_secure_ms": 1000,
"connect_timeout": 1,
"send_timeout": 1,
},
)
assert "not allowed" in node6.query_and_get_error( assert "not allowed" in node6.query_and_get_error(
"SELECT * FROM remoteSecure('example01-01-1,example01-03-1', system, events)", "SELECT * FROM remoteSecure('example01-01-1,example01-03-1', system, events)",
settings={"connections_with_failover_max_tries": 1, "connect_timeout_with_failover_ms": 1000, settings={
"connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout": 1}) "connections_with_failover_max_tries": 1,
assert "not allowed" in node6.query_and_get_error("SELECT * FROM remote('example01-01-{1|3}', system, events)", "connect_timeout_with_failover_ms": 1000,
settings={"connections_with_failover_max_tries": 1, "connect_timeout_with_failover_secure_ms": 1000,
"connect_timeout_with_failover_ms": 1000, "connect_timeout": 1,
"connect_timeout_with_failover_secure_ms": 1000, "send_timeout": 1,
"connect_timeout": 1, "send_timeout": 1}) },
)
assert "not allowed" in node6.query_and_get_error(
"SELECT * FROM remote('example01-01-{1|3}', system, events)",
settings={
"connections_with_failover_max_tries": 1,
"connect_timeout_with_failover_ms": 1000,
"connect_timeout_with_failover_secure_ms": 1000,
"connect_timeout": 1,
"send_timeout": 1,
},
)
assert "not allowed" in node6.query_and_get_error( assert "not allowed" in node6.query_and_get_error(
"SELECT * FROM remoteSecure('example01-0{1,3}-1', system, metrics)", "SELECT * FROM remoteSecure('example01-0{1,3}-1', system, metrics)",
settings={"connections_with_failover_max_tries": 1, "connect_timeout_with_failover_ms": 1000, settings={
"connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout": 1}) "connections_with_failover_max_tries": 1,
"connect_timeout_with_failover_ms": 1000,
"connect_timeout_with_failover_secure_ms": 1000,
"connect_timeout": 1,
"send_timeout": 1,
},
)
assert node6.query("SELECT * FROM remote('localhost', system, events)") != "" assert node6.query("SELECT * FROM remote('localhost', system, events)") != ""
assert node6.query("SELECT * FROM remoteSecure('localhost', system, metrics)") != "" assert node6.query("SELECT * FROM remoteSecure('localhost', system, metrics)") != ""
assert "URL \"localhost:800\" is not allowed in configuration file" in node6.query_and_get_error( assert (
"SELECT * FROM remoteSecure('localhost:800', system, events)") 'URL "localhost:800" is not allowed in configuration file'
assert "URL \"localhost:800\" is not allowed in configuration file" in node6.query_and_get_error( in node6.query_and_get_error(
"SELECT * FROM remote('localhost:800', system, metrics)") "SELECT * FROM remoteSecure('localhost:800', system, events)"
)
)
assert (
'URL "localhost:800" is not allowed in configuration file'
in node6.query_and_get_error(
"SELECT * FROM remote('localhost:800', system, metrics)"
)
)
def test_redirect(start_cluster): def test_redirect(start_cluster):
@ -120,12 +262,17 @@ def test_redirect(start_cluster):
hdfs_api.write_data("/simple_storage", "1\t\n") hdfs_api.write_data("/simple_storage", "1\t\n")
assert hdfs_api.read_data("/simple_storage") == "1\t\n" assert hdfs_api.read_data("/simple_storage") == "1\t\n"
node7.query( node7.query(
"CREATE TABLE table_test_7_1 (word String) ENGINE=URL('http://hdfs1:50070/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', CSV)") "CREATE TABLE table_test_7_1 (word String) ENGINE=URL('http://hdfs1:50070/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', CSV)"
assert "not allowed" in node7.query_and_get_error("SET max_http_get_redirects=1; SELECT * from table_test_7_1") )
assert "not allowed" in node7.query_and_get_error(
"SET max_http_get_redirects=1; SELECT * from table_test_7_1"
)
def test_HDFS(start_cluster): def test_HDFS(start_cluster):
assert "not allowed" in node7.query_and_get_error( assert "not allowed" in node7.query_and_get_error(
"CREATE TABLE table_test_7_2 (word String) ENGINE=HDFS('http://hdfs1:50075/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'CSV')") "CREATE TABLE table_test_7_2 (word String) ENGINE=HDFS('http://hdfs1:50075/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'CSV')"
)
assert "not allowed" in node7.query_and_get_error( assert "not allowed" in node7.query_and_get_error(
"SELECT * FROM hdfs('http://hdfs1:50075/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'TSV', 'word String')") "SELECT * FROM hdfs('http://hdfs1:50075/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'TSV', 'word String')"
)

View File

@ -4,8 +4,7 @@ from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__) cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', node1 = cluster.add_instance("node1", main_configs=["configs/logs_config.xml"])
main_configs=['configs/logs_config.xml'])
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@ -21,30 +20,60 @@ def started_cluster():
def test_alter_codec_pk(started_cluster): def test_alter_codec_pk(started_cluster):
try: try:
name = "test_alter_codec_pk" name = "test_alter_codec_pk"
node1.query(""" node1.query(
"""
CREATE TABLE {name} (id UInt64, value UInt64) Engine=MergeTree() ORDER BY id CREATE TABLE {name} (id UInt64, value UInt64) Engine=MergeTree() ORDER BY id
""".format(name=name)) """.format(
name=name
)
)
node1.query("INSERT INTO {name} SELECT number, number * number from numbers(100)".format(name=name)) node1.query(
"INSERT INTO {name} SELECT number, number * number from numbers(100)".format(
name=name
)
)
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 CODEC(NONE)".format(name=name)) node1.query(
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 CODEC(Delta, LZ4)".format(name=name)) "ALTER TABLE {name} MODIFY COLUMN id UInt64 CODEC(NONE)".format(name=name)
)
node1.query(
"ALTER TABLE {name} MODIFY COLUMN id UInt64 CODEC(Delta, LZ4)".format(
name=name
)
)
assert node1.query("SELECT sum(id) FROM {name}".format(name=name)) == "4950\n" assert node1.query("SELECT sum(id) FROM {name}".format(name=name)) == "4950\n"
with pytest.raises(QueryRuntimeException): with pytest.raises(QueryRuntimeException):
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt32 CODEC(Delta, LZ4)".format(name=name)) node1.query(
"ALTER TABLE {name} MODIFY COLUMN id UInt32 CODEC(Delta, LZ4)".format(
name=name
)
)
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 DEFAULT 3 CODEC(Delta, LZ4)".format(name=name)) node1.query(
"ALTER TABLE {name} MODIFY COLUMN id UInt64 DEFAULT 3 CODEC(Delta, LZ4)".format(
name=name
)
)
node1.query("INSERT INTO {name} (value) VALUES (1)".format(name=name)) node1.query("INSERT INTO {name} (value) VALUES (1)".format(name=name))
assert node1.query("SELECT sum(id) FROM {name}".format(name=name)) == "4953\n" assert node1.query("SELECT sum(id) FROM {name}".format(name=name)) == "4953\n"
with pytest.raises(QueryRuntimeException): with pytest.raises(QueryRuntimeException):
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 ALIAS 3 CODEC(Delta, LZ4)".format(name=name)) node1.query(
"ALTER TABLE {name} MODIFY COLUMN id UInt64 ALIAS 3 CODEC(Delta, LZ4)".format(
name=name
)
)
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 MATERIALIZED 3 CODEC(Delta, LZ4)".format(name=name)) node1.query(
"ALTER TABLE {name} MODIFY COLUMN id UInt64 MATERIALIZED 3 CODEC(Delta, LZ4)".format(
name=name
)
)
node1.query("INSERT INTO {name} (value) VALUES (1)".format(name=name)) node1.query("INSERT INTO {name} (value) VALUES (1)".format(name=name))
@ -61,28 +90,58 @@ def test_alter_codec_pk(started_cluster):
def test_alter_codec_index(started_cluster): def test_alter_codec_index(started_cluster):
try: try:
name = "test_alter_codec_index" name = "test_alter_codec_index"
node1.query(""" node1.query(
"""
CREATE TABLE {name} (`id` UInt64, value UInt64, INDEX id_index id TYPE minmax GRANULARITY 1) Engine=MergeTree() ORDER BY tuple() CREATE TABLE {name} (`id` UInt64, value UInt64, INDEX id_index id TYPE minmax GRANULARITY 1) Engine=MergeTree() ORDER BY tuple()
""".format(name=name)) """.format(
name=name
)
)
node1.query("INSERT INTO {name} SELECT number, number * number from numbers(100)".format(name=name)) node1.query(
"INSERT INTO {name} SELECT number, number * number from numbers(100)".format(
name=name
)
)
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 CODEC(NONE)".format(name=name)) node1.query(
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 CODEC(Delta, LZ4)".format(name=name)) "ALTER TABLE {name} MODIFY COLUMN id UInt64 CODEC(NONE)".format(name=name)
)
node1.query(
"ALTER TABLE {name} MODIFY COLUMN id UInt64 CODEC(Delta, LZ4)".format(
name=name
)
)
with pytest.raises(QueryRuntimeException): with pytest.raises(QueryRuntimeException):
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt32 CODEC(Delta, LZ4)".format(name=name)) node1.query(
"ALTER TABLE {name} MODIFY COLUMN id UInt32 CODEC(Delta, LZ4)".format(
name=name
)
)
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 DEFAULT 3 CODEC(Delta, LZ4)".format(name=name)) node1.query(
"ALTER TABLE {name} MODIFY COLUMN id UInt64 DEFAULT 3 CODEC(Delta, LZ4)".format(
name=name
)
)
node1.query("INSERT INTO {name} (value) VALUES (1)".format(name=name)) node1.query("INSERT INTO {name} (value) VALUES (1)".format(name=name))
assert node1.query("SELECT sum(id) FROM {name}".format(name=name)) == "4953\n" assert node1.query("SELECT sum(id) FROM {name}".format(name=name)) == "4953\n"
with pytest.raises(QueryRuntimeException): with pytest.raises(QueryRuntimeException):
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 ALIAS 3 CODEC(Delta, LZ4)".format(name=name)) node1.query(
"ALTER TABLE {name} MODIFY COLUMN id UInt64 ALIAS 3 CODEC(Delta, LZ4)".format(
name=name
)
)
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 MATERIALIZED 3 CODEC(Delta, LZ4)".format(name=name)) node1.query(
"ALTER TABLE {name} MODIFY COLUMN id UInt64 MATERIALIZED 3 CODEC(Delta, LZ4)".format(
name=name
)
)
node1.query("INSERT INTO {name} (value) VALUES (1)".format(name=name)) node1.query("INSERT INTO {name} (value) VALUES (1)".format(name=name))

View File

@ -4,11 +4,18 @@ from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__) cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml'], with_zookeeper=True) node1 = cluster.add_instance(
node2 = cluster.add_instance('node2', main_configs=['configs/remote_servers.xml'], with_zookeeper=True) "node1", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
node3 = cluster.add_instance('node3', main_configs=['configs/remote_servers.xml'], with_zookeeper=True) )
node4 = cluster.add_instance('node4', main_configs=['configs/remote_servers.xml'], with_zookeeper=True) node2 = cluster.add_instance(
"node2", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
)
node3 = cluster.add_instance(
"node3", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
)
node4 = cluster.add_instance(
"node4", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
)
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@ -17,19 +24,31 @@ def started_cluster():
cluster.start() cluster.start()
for node in [node1, node2]: for node in [node1, node2]:
node.query_with_retry(''' node.query_with_retry(
"""
CREATE TABLE IF NOT EXISTS test_table_replicated(date Date, id UInt32, value Int32) CREATE TABLE IF NOT EXISTS test_table_replicated(date Date, id UInt32, value Int32)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/sometable', '{replica}') ORDER BY id; ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/sometable', '{replica}') ORDER BY id;
'''.format(replica=node.name)) """.format(
node.query_with_retry('''CREATE TABLE IF NOT EXISTS test_table(date Date, id UInt32, value Int32) ENGINE=MergeTree ORDER BY id''') replica=node.name
)
)
node.query_with_retry(
"""CREATE TABLE IF NOT EXISTS test_table(date Date, id UInt32, value Int32) ENGINE=MergeTree ORDER BY id"""
)
for node in [node3, node4]: for node in [node3, node4]:
node.query_with_retry(''' node.query_with_retry(
"""
CREATE TABLE IF NOT EXISTS test_table_replicated(date Date, id UInt32, value Int32) CREATE TABLE IF NOT EXISTS test_table_replicated(date Date, id UInt32, value Int32)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/1/someotable', '{replica}') ORDER BY id; ENGINE = ReplicatedMergeTree('/clickhouse/tables/1/someotable', '{replica}') ORDER BY id;
'''.format(replica=node.name)) """.format(
replica=node.name
)
)
node.query_with_retry('''CREATE TABLE IF NOT EXISTS test_table(date Date, id UInt32, value Int32) ENGINE=MergeTree ORDER BY id''') node.query_with_retry(
"""CREATE TABLE IF NOT EXISTS test_table(date Date, id UInt32, value Int32) ENGINE=MergeTree ORDER BY id"""
)
yield cluster yield cluster
@ -46,17 +65,23 @@ def test_alter_on_cluter_non_replicated(started_cluster):
assert node3.query("SELECT COUNT() FROM test_table") == "1\n" assert node3.query("SELECT COUNT() FROM test_table") == "1\n"
assert node4.query("SELECT COUNT() FROM test_table") == "1\n" assert node4.query("SELECT COUNT() FROM test_table") == "1\n"
node1.query("ALTER TABLE test_table ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN date DateTime") node1.query(
"ALTER TABLE test_table ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN date DateTime"
)
assert node1.query("SELECT date FROM test_table") == '2019-10-01 00:00:00\n' assert node1.query("SELECT date FROM test_table") == "2019-10-01 00:00:00\n"
assert node2.query("SELECT date FROM test_table") == '2019-10-01 00:00:00\n' assert node2.query("SELECT date FROM test_table") == "2019-10-01 00:00:00\n"
assert node3.query("SELECT date FROM test_table") == '2019-10-01 00:00:00\n' assert node3.query("SELECT date FROM test_table") == "2019-10-01 00:00:00\n"
assert node4.query("SELECT date FROM test_table") == '2019-10-01 00:00:00\n' assert node4.query("SELECT date FROM test_table") == "2019-10-01 00:00:00\n"
node3.query("ALTER TABLE test_table ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN value String") node3.query(
"ALTER TABLE test_table ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN value String"
)
for node in [node1, node2, node3, node4]: for node in [node1, node2, node3, node4]:
node.query("INSERT INTO test_table VALUES(toDateTime('2019-10-02 00:00:00'), 2, 'Hello')") node.query(
"INSERT INTO test_table VALUES(toDateTime('2019-10-02 00:00:00'), 2, 'Hello')"
)
assert node1.query("SELECT COUNT() FROM test_table") == "2\n" assert node1.query("SELECT COUNT() FROM test_table") == "2\n"
assert node2.query("SELECT COUNT() FROM test_table") == "2\n" assert node2.query("SELECT COUNT() FROM test_table") == "2\n"
@ -66,22 +91,40 @@ def test_alter_on_cluter_non_replicated(started_cluster):
def test_alter_replicated_on_cluster(started_cluster): def test_alter_replicated_on_cluster(started_cluster):
for node in [node1, node3]: for node in [node1, node3]:
node.query("INSERT INTO test_table_replicated VALUES(toDate('2019-10-01'), 1, 1)") node.query(
"INSERT INTO test_table_replicated VALUES(toDate('2019-10-01'), 1, 1)"
)
for node in [node2, node4]: for node in [node2, node4]:
node.query("SYSTEM SYNC REPLICA test_table_replicated", timeout=20) node.query("SYSTEM SYNC REPLICA test_table_replicated", timeout=20)
node1.query("ALTER TABLE test_table_replicated ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN date DateTime", settings={"replication_alter_partitions_sync": "2"}) node1.query(
"ALTER TABLE test_table_replicated ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN date DateTime",
settings={"replication_alter_partitions_sync": "2"},
)
assert node1.query("SELECT date FROM test_table_replicated") == '2019-10-01 00:00:00\n' assert (
assert node2.query("SELECT date FROM test_table_replicated") == '2019-10-01 00:00:00\n' node1.query("SELECT date FROM test_table_replicated") == "2019-10-01 00:00:00\n"
assert node3.query("SELECT date FROM test_table_replicated") == '2019-10-01 00:00:00\n' )
assert node4.query("SELECT date FROM test_table_replicated") == '2019-10-01 00:00:00\n' assert (
node2.query("SELECT date FROM test_table_replicated") == "2019-10-01 00:00:00\n"
)
assert (
node3.query("SELECT date FROM test_table_replicated") == "2019-10-01 00:00:00\n"
)
assert (
node4.query("SELECT date FROM test_table_replicated") == "2019-10-01 00:00:00\n"
)
node3.query_with_retry("ALTER TABLE test_table_replicated ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN value String", settings={"replication_alter_partitions_sync": "2"}) node3.query_with_retry(
"ALTER TABLE test_table_replicated ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN value String",
settings={"replication_alter_partitions_sync": "2"},
)
for node in [node2, node4]: for node in [node2, node4]:
node.query("INSERT INTO test_table_replicated VALUES(toDateTime('2019-10-02 00:00:00'), 2, 'Hello')") node.query(
"INSERT INTO test_table_replicated VALUES(toDateTime('2019-10-02 00:00:00'), 2, 'Hello')"
)
for node in [node1, node3]: for node in [node1, node3]:
node.query("SYSTEM SYNC REPLICA test_table_replicated", timeout=20) node.query("SYSTEM SYNC REPLICA test_table_replicated", timeout=20)

View File

@ -3,7 +3,10 @@ from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__) cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', user_configs=['configs/users.xml'], with_zookeeper=True) node1 = cluster.add_instance(
"node1", user_configs=["configs/users.xml"], with_zookeeper=True
)
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def started_cluster(): def started_cluster():
@ -13,24 +16,30 @@ def started_cluster():
finally: finally:
cluster.shutdown() cluster.shutdown()
def test_cast_keep_nullable(started_cluster):
setting = node1.query("SELECT value FROM system.settings WHERE name='cast_keep_nullable'")
assert(setting.strip() == "1")
result = node1.query(""" def test_cast_keep_nullable(started_cluster):
setting = node1.query(
"SELECT value FROM system.settings WHERE name='cast_keep_nullable'"
)
assert setting.strip() == "1"
result = node1.query(
"""
DROP TABLE IF EXISTS t; DROP TABLE IF EXISTS t;
CREATE TABLE t (x UInt64) ENGINE = MergeTree ORDER BY tuple(); CREATE TABLE t (x UInt64) ENGINE = MergeTree ORDER BY tuple();
INSERT INTO t SELECT number FROM numbers(10); INSERT INTO t SELECT number FROM numbers(10);
SELECT * FROM t; SELECT * FROM t;
""") """
assert(result.strip() == "0\n1\n2\n3\n4\n5\n6\n7\n8\n9") )
assert result.strip() == "0\n1\n2\n3\n4\n5\n6\n7\n8\n9"
error = node1.query_and_get_error(""" error = node1.query_and_get_error(
"""
SET mutations_sync = 1; SET mutations_sync = 1;
ALTER TABLE t UPDATE x = x % 3 = 0 ? NULL : x WHERE x % 2 = 1;  ALTER TABLE t UPDATE x = x % 3 = 0 ? NULL : x WHERE x % 2 = 1; 
""") """
assert("DB::Exception: Cannot convert NULL value to non-Nullable type" in error) )
assert "DB::Exception: Cannot convert NULL value to non-Nullable type" in error
result = node1.query("SELECT * FROM t;") result = node1.query("SELECT * FROM t;")
assert(result.strip() == "0\n1\n2\n3\n4\n5\n6\n7\n8\n9") assert result.strip() == "0\n1\n2\n3\n4\n5\n6\n7\n8\n9"

View File

@ -6,8 +6,8 @@ from helpers.test_tools import assert_eq_with_retry
cluster = ClickHouseCluster(__file__) cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', with_zookeeper=True) node1 = cluster.add_instance("node1", with_zookeeper=True)
node2 = cluster.add_instance('node2', with_zookeeper=True) node2 = cluster.add_instance("node2", with_zookeeper=True)
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@ -22,21 +22,25 @@ def started_cluster():
def test_replica_always_download(started_cluster): def test_replica_always_download(started_cluster):
node1.query_with_retry(""" node1.query_with_retry(
"""
CREATE TABLE IF NOT EXISTS test_table( CREATE TABLE IF NOT EXISTS test_table(
key UInt64, key UInt64,
value String value String
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_table/replicated', '1') ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_table/replicated', '1')
ORDER BY tuple() ORDER BY tuple()
""") """
node2.query_with_retry(""" )
node2.query_with_retry(
"""
CREATE TABLE IF NOT EXISTS test_table( CREATE TABLE IF NOT EXISTS test_table(
key UInt64, key UInt64,
value String value String
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_table/replicated', '2') ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_table/replicated', '2')
ORDER BY tuple() ORDER BY tuple()
SETTINGS always_fetch_merged_part=1 SETTINGS always_fetch_merged_part=1
""") """
)
# Stop merges on single node # Stop merges on single node
node1.query("SYSTEM STOP MERGES") node1.query("SYSTEM STOP MERGES")
@ -50,15 +54,29 @@ def test_replica_always_download(started_cluster):
time.sleep(5) time.sleep(5)
# Nothing is merged # Nothing is merged
assert node1.query("SELECT COUNT() FROM system.parts WHERE table = 'test_table' and active=1") == "10\n" assert (
assert node2.query("SELECT COUNT() FROM system.parts WHERE table = 'test_table' and active=1") == "10\n" node1.query(
"SELECT COUNT() FROM system.parts WHERE table = 'test_table' and active=1"
)
== "10\n"
)
assert (
node2.query(
"SELECT COUNT() FROM system.parts WHERE table = 'test_table' and active=1"
)
== "10\n"
)
node1.query("SYSTEM START MERGES") node1.query("SYSTEM START MERGES")
node1.query("OPTIMIZE TABLE test_table") node1.query("OPTIMIZE TABLE test_table")
node2.query("SYSTEM SYNC REPLICA test_table") node2.query("SYSTEM SYNC REPLICA test_table")
node1_parts = node1.query("SELECT COUNT() FROM system.parts WHERE table = 'test_table' and active=1").strip() node1_parts = node1.query(
node2_parts = node2.query("SELECT COUNT() FROM system.parts WHERE table = 'test_table' and active=1").strip() "SELECT COUNT() FROM system.parts WHERE table = 'test_table' and active=1"
).strip()
node2_parts = node2.query(
"SELECT COUNT() FROM system.parts WHERE table = 'test_table' and active=1"
).strip()
assert int(node1_parts) < 10 assert int(node1_parts) < 10
assert int(node2_parts) < 10 assert int(node2_parts) < 10

View File

@ -5,17 +5,19 @@ import pytest
from helpers.cluster import ClickHouseCluster from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__) cluster = ClickHouseCluster(__file__)
node = cluster.add_instance('node', main_configs=['configs/config.xml']) node = cluster.add_instance("node", main_configs=["configs/config.xml"])
@pytest.fixture(scope='module') @pytest.fixture(scope="module")
def started_cluster(): def started_cluster():
try: try:
cluster.start() cluster.start()
node.query(""" node.query(
"""
create table t (number UInt64) create table t (number UInt64)
engine = Distributed(test_cluster_two_shards, system, numbers) engine = Distributed(test_cluster_two_shards, system, numbers)
""") """
)
yield cluster yield cluster
finally: finally:
@ -25,12 +27,15 @@ def started_cluster():
def test_filled_async_drain_connection_pool(started_cluster): def test_filled_async_drain_connection_pool(started_cluster):
def execute_queries(_): def execute_queries(_):
for _ in range(100): for _ in range(100):
node.query('select * from t where number = 0 limit 2', settings={ node.query(
'sleep_in_receive_cancel_ms': int(10e6), "select * from t where number = 0 limit 2",
'max_execution_time': 5, settings={
# decrease drain_timeout to make test more stable "sleep_in_receive_cancel_ms": int(10e6),
# (another way is to increase max_execution_time, but this will make test slower) "max_execution_time": 5,
'drain_timeout': 1, # decrease drain_timeout to make test more stable
}) # (another way is to increase max_execution_time, but this will make test slower)
"drain_timeout": 1,
},
)
any(map(execute_queries, range(10))) any(map(execute_queries, range(10)))

View File

@ -4,8 +4,11 @@ import pytest
from helpers.cluster import ClickHouseCluster from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__) cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', with_zookeeper=True, node1 = cluster.add_instance(
main_configs=['configs/asynchronous_metrics_update_period_s.xml']) "node1",
with_zookeeper=True,
main_configs=["configs/asynchronous_metrics_update_period_s.xml"],
)
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@ -27,20 +30,20 @@ def test_event_time_microseconds_field(started_cluster):
cluster.start() cluster.start()
node1.query("SET log_queries = 1;") node1.query("SET log_queries = 1;")
node1.query("CREATE DATABASE replica;") node1.query("CREATE DATABASE replica;")
query_create = '''CREATE TABLE replica.test query_create = """CREATE TABLE replica.test
( (
id Int64, id Int64,
event_time DateTime event_time DateTime
) )
Engine=MergeTree() Engine=MergeTree()
PARTITION BY toYYYYMMDD(event_time) PARTITION BY toYYYYMMDD(event_time)
ORDER BY id;''' ORDER BY id;"""
time.sleep(2) time.sleep(2)
node1.query(query_create) node1.query(query_create)
node1.query('''INSERT INTO replica.test VALUES (1, now())''') node1.query("""INSERT INTO replica.test VALUES (1, now())""")
node1.query("SYSTEM FLUSH LOGS;") node1.query("SYSTEM FLUSH LOGS;")
# query assumes that the event_time field is accurate # query assumes that the event_time field is accurate
equals_query = '''WITH ( equals_query = """WITH (
( (
SELECT event_time_microseconds SELECT event_time_microseconds
FROM system.asynchronous_metric_log FROM system.asynchronous_metric_log
@ -53,7 +56,7 @@ def test_event_time_microseconds_field(started_cluster):
ORDER BY event_time DESC ORDER BY event_time DESC
LIMIT 1 LIMIT 1
) AS time) ) AS time)
SELECT if(dateDiff('second', toDateTime(time_with_microseconds), toDateTime(time)) = 0, 'ok', 'fail')''' SELECT if(dateDiff('second', toDateTime(time_with_microseconds), toDateTime(time)) = 0, 'ok', 'fail')"""
assert "ok\n" in node1.query(equals_query) assert "ok\n" in node1.query(equals_query)
finally: finally:
cluster.shutdown() cluster.shutdown()

View File

@ -5,21 +5,29 @@ from helpers.cluster import ClickHouseCluster
from helpers.network import PartitionManager from helpers.network import PartitionManager
cluster = ClickHouseCluster(__file__) cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', main_configs=["configs/config.d/zookeeper_session_timeout.xml", node1 = cluster.add_instance(
"configs/remote_servers.xml"], with_zookeeper=True) "node1",
main_configs=[
"configs/config.d/zookeeper_session_timeout.xml",
"configs/remote_servers.xml",
],
with_zookeeper=True,
)
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def start_cluster(): def start_cluster():
try: try:
cluster.start() cluster.start()
node1.query("CREATE DATABASE zktest ENGINE=Ordinary;") # Different behaviour with Atomic
node1.query( node1.query(
''' "CREATE DATABASE zktest ENGINE=Ordinary;"
) # Different behaviour with Atomic
node1.query(
"""
CREATE TABLE zktest.atomic_drop_table (n UInt32) CREATE TABLE zktest.atomic_drop_table (n UInt32)
ENGINE = ReplicatedMergeTree('/clickhouse/zktest/tables/atomic_drop_table', 'node1') ENGINE = ReplicatedMergeTree('/clickhouse/zktest/tables/atomic_drop_table', 'node1')
PARTITION BY n ORDER BY n PARTITION BY n ORDER BY n
''' """
) )
yield cluster yield cluster
finally: finally:
@ -31,8 +39,10 @@ def test_atomic_delete_with_stopped_zookeeper(start_cluster):
with PartitionManager() as pm: with PartitionManager() as pm:
pm.drop_instance_zk_connections(node1) pm.drop_instance_zk_connections(node1)
error = node1.query_and_get_error("DROP TABLE zktest.atomic_drop_table") # Table won't drop error = node1.query_and_get_error(
"DROP TABLE zktest.atomic_drop_table"
) # Table won't drop
assert error != "" assert error != ""
time.sleep(5) time.sleep(5)
assert '8192' in node1.query("select * from zktest.atomic_drop_table") assert "8192" in node1.query("select * from zktest.atomic_drop_table")

View File

@ -3,7 +3,9 @@ import pytest
from helpers.cluster import ClickHouseCluster from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__) cluster = ClickHouseCluster(__file__)
node = cluster.add_instance('node', main_configs=["configs/config.xml"], with_zookeeper=True) node = cluster.add_instance(
"node", main_configs=["configs/config.xml"], with_zookeeper=True
)
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@ -14,18 +16,35 @@ def started_cluster():
finally: finally:
cluster.shutdown() cluster.shutdown()
def create_force_drop_flag(node): def create_force_drop_flag(node):
force_drop_flag_path = "/var/lib/clickhouse/flags/force_drop_table" force_drop_flag_path = "/var/lib/clickhouse/flags/force_drop_table"
node.exec_in_container(["bash", "-c", "touch {} && chmod a=rw {}".format(force_drop_flag_path, force_drop_flag_path)], user="root") node.exec_in_container(
[
"bash",
"-c",
"touch {} && chmod a=rw {}".format(
force_drop_flag_path, force_drop_flag_path
),
],
user="root",
)
@pytest.mark.parametrize("engine", ['Ordinary', 'Atomic'])
@pytest.mark.parametrize("engine", ["Ordinary", "Atomic"])
def test_attach_partition_with_large_destination(started_cluster, engine): def test_attach_partition_with_large_destination(started_cluster, engine):
# Initialize # Initialize
node.query("CREATE DATABASE db ENGINE={}".format(engine)) node.query("CREATE DATABASE db ENGINE={}".format(engine))
node.query("CREATE TABLE db.destination (n UInt64) ENGINE=ReplicatedMergeTree('/test/destination', 'r1') ORDER BY n PARTITION BY n % 2") node.query(
node.query("CREATE TABLE db.source_1 (n UInt64) ENGINE=ReplicatedMergeTree('/test/source_1', 'r1') ORDER BY n PARTITION BY n % 2") "CREATE TABLE db.destination (n UInt64) ENGINE=ReplicatedMergeTree('/test/destination', 'r1') ORDER BY n PARTITION BY n % 2"
)
node.query(
"CREATE TABLE db.source_1 (n UInt64) ENGINE=ReplicatedMergeTree('/test/source_1', 'r1') ORDER BY n PARTITION BY n % 2"
)
node.query("INSERT INTO db.source_1 VALUES (1), (2), (3), (4)") node.query("INSERT INTO db.source_1 VALUES (1), (2), (3), (4)")
node.query("CREATE TABLE db.source_2 (n UInt64) ENGINE=ReplicatedMergeTree('/test/source_2', 'r1') ORDER BY n PARTITION BY n % 2") node.query(
"CREATE TABLE db.source_2 (n UInt64) ENGINE=ReplicatedMergeTree('/test/source_2', 'r1') ORDER BY n PARTITION BY n % 2"
)
node.query("INSERT INTO db.source_2 VALUES (5), (6), (7), (8)") node.query("INSERT INTO db.source_2 VALUES (5), (6), (7), (8)")
# Attach partition when destination partition is empty # Attach partition when destination partition is empty
@ -33,7 +52,9 @@ def test_attach_partition_with_large_destination(started_cluster, engine):
assert node.query("SELECT n FROM db.destination ORDER BY n") == "2\n4\n" assert node.query("SELECT n FROM db.destination ORDER BY n") == "2\n4\n"
# REPLACE PARTITION should still respect max_partition_size_to_drop # REPLACE PARTITION should still respect max_partition_size_to_drop
assert node.query_and_get_error("ALTER TABLE db.destination REPLACE PARTITION 0 FROM db.source_2") assert node.query_and_get_error(
"ALTER TABLE db.destination REPLACE PARTITION 0 FROM db.source_2"
)
assert node.query("SELECT n FROM db.destination ORDER BY n") == "2\n4\n" assert node.query("SELECT n FROM db.destination ORDER BY n") == "2\n4\n"
# Attach partition when destination partition is larger than max_partition_size_to_drop # Attach partition when destination partition is larger than max_partition_size_to_drop
@ -47,4 +68,4 @@ def test_attach_partition_with_large_destination(started_cluster, engine):
node.query("DROP TABLE db.source_2 SYNC") node.query("DROP TABLE db.source_2 SYNC")
create_force_drop_flag(node) create_force_drop_flag(node)
node.query("DROP TABLE db.destination SYNC") node.query("DROP TABLE db.destination SYNC")
node.query("DROP DATABASE db") node.query("DROP DATABASE db")

View File

@ -3,7 +3,8 @@ from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__) cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1') node1 = cluster.add_instance("node1")
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def start_cluster(): def start_cluster():
@ -17,9 +18,12 @@ def start_cluster():
def test_attach_without_checksums(start_cluster): def test_attach_without_checksums(start_cluster):
node1.query( node1.query(
"CREATE TABLE test (date Date, key Int32, value String) Engine=MergeTree ORDER BY key PARTITION by date") "CREATE TABLE test (date Date, key Int32, value String) Engine=MergeTree ORDER BY key PARTITION by date"
)
node1.query("INSERT INTO test SELECT toDate('2019-10-01'), number, toString(number) FROM numbers(100)") node1.query(
"INSERT INTO test SELECT toDate('2019-10-01'), number, toString(number) FROM numbers(100)"
)
assert node1.query("SELECT COUNT() FROM test WHERE key % 10 == 0") == "10\n" assert node1.query("SELECT COUNT() FROM test WHERE key % 10 == 0") == "10\n"
@ -30,15 +34,27 @@ def test_attach_without_checksums(start_cluster):
# to be sure output not empty # to be sure output not empty
node1.exec_in_container( node1.exec_in_container(
['bash', '-c', 'find /var/lib/clickhouse/data/default/test/detached -name "checksums.txt" | grep -e ".*" '], [
privileged=True, user='root') "bash",
"-c",
'find /var/lib/clickhouse/data/default/test/detached -name "checksums.txt" | grep -e ".*" ',
],
privileged=True,
user="root",
)
node1.exec_in_container( node1.exec_in_container(
['bash', '-c', 'find /var/lib/clickhouse/data/default/test/detached -name "checksums.txt" -delete'], [
privileged=True, user='root') "bash",
"-c",
'find /var/lib/clickhouse/data/default/test/detached -name "checksums.txt" -delete',
],
privileged=True,
user="root",
)
node1.query("ALTER TABLE test ATTACH PARTITION '2019-10-01'") node1.query("ALTER TABLE test ATTACH PARTITION '2019-10-01'")
assert node1.query("SELECT COUNT() FROM test WHERE key % 10 == 0") == "10\n" assert node1.query("SELECT COUNT() FROM test WHERE key % 10 == 0") == "10\n"
assert node1.query("SELECT COUNT() FROM test") == "100\n" assert node1.query("SELECT COUNT() FROM test") == "100\n"
node1.query("DROP TABLE test") node1.query("DROP TABLE test")

View File

@ -7,19 +7,25 @@ from helpers.test_tools import assert_eq_with_retry
from helpers.network import PartitionManager from helpers.network import PartitionManager
from helpers.corrupt_part_data_on_disk import corrupt_part_data_by_path from helpers.corrupt_part_data_on_disk import corrupt_part_data_by_path
def fill_node(node): def fill_node(node):
node.query_with_retry( node.query_with_retry(
''' """
CREATE TABLE IF NOT EXISTS test(n UInt32) CREATE TABLE IF NOT EXISTS test(n UInt32)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/test', '{replica}') ENGINE = ReplicatedMergeTree('/clickhouse/tables/test', '{replica}')
ORDER BY n PARTITION BY n % 10; ORDER BY n PARTITION BY n % 10;
'''.format(replica=node.name)) """.format(
replica=node.name
)
)
cluster = ClickHouseCluster(__file__) cluster = ClickHouseCluster(__file__)
node_1 = cluster.add_instance('replica1', with_zookeeper=True) node_1 = cluster.add_instance("replica1", with_zookeeper=True)
node_2 = cluster.add_instance('replica2', with_zookeeper=True) node_2 = cluster.add_instance("replica2", with_zookeeper=True)
node_3 = cluster.add_instance('replica3', with_zookeeper=True) node_3 = cluster.add_instance("replica3", with_zookeeper=True)
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def start_cluster(): def start_cluster():
@ -36,27 +42,42 @@ def start_cluster():
finally: finally:
cluster.shutdown() cluster.shutdown()
def check_data(nodes, detached_parts): def check_data(nodes, detached_parts):
for node in nodes: for node in nodes:
print("> Replication queue for", node.name, "\n> table\treplica_name\tsource_replica\ttype\tposition\n", print(
node.query_with_retry("SELECT table, replica_name, source_replica, type, position FROM system.replication_queue")) "> Replication queue for",
node.name,
"\n> table\treplica_name\tsource_replica\ttype\tposition\n",
node.query_with_retry(
"SELECT table, replica_name, source_replica, type, position FROM system.replication_queue"
),
)
node.query_with_retry("SYSTEM SYNC REPLICA test") node.query_with_retry("SYSTEM SYNC REPLICA test")
print("> Checking data integrity for", node.name) print("> Checking data integrity for", node.name)
for i in range(10): for i in range(10):
assert_eq_with_retry(node, "SELECT count() FROM test WHERE n % 10 == " + str(i), assert_eq_with_retry(
"0\n" if i in detached_parts else "10\n") node,
"SELECT count() FROM test WHERE n % 10 == " + str(i),
"0\n" if i in detached_parts else "10\n",
)
assert_eq_with_retry(node, "SELECT count() FROM system.parts WHERE table='test'", assert_eq_with_retry(
str(10 - len(detached_parts)) + "\n") node,
"SELECT count() FROM system.parts WHERE table='test'",
str(10 - len(detached_parts)) + "\n",
)
res: str = node.query("SELECT * FROM test ORDER BY n") res: str = node.query("SELECT * FROM test ORDER BY n")
for other in nodes: for other in nodes:
if other != node: if other != node:
logging.debug(f"> Checking data consistency, {other.name} vs {node.name}") logging.debug(
f"> Checking data consistency, {other.name} vs {node.name}"
)
assert_eq_with_retry(other, "SELECT * FROM test ORDER BY n", res) assert_eq_with_retry(other, "SELECT * FROM test ORDER BY n", res)
@ -83,7 +104,6 @@ def test_attach_without_fetching(start_cluster):
# files missing. # files missing.
node_1.query("ALTER TABLE test DETACH PARTITION 2") node_1.query("ALTER TABLE test DETACH PARTITION 2")
check_data([node_1, node_2], detached_parts=[0, 1, 2]) check_data([node_1, node_2], detached_parts=[0, 1, 2])
# 2. Create the third replica # 2. Create the third replica
@ -94,14 +114,28 @@ def test_attach_without_fetching(start_cluster):
# Replica 2 should also download the data from 1 as the checksums won't match. # Replica 2 should also download the data from 1 as the checksums won't match.
logging.debug("Checking attach with corrupted part data with files missing") logging.debug("Checking attach with corrupted part data with files missing")
to_delete = node_2.exec_in_container(['bash', '-c', to_delete = node_2.exec_in_container(
'cd {p} && ls *.bin'.format( [
p="/var/lib/clickhouse/data/default/test/detached/2_0_0_0")], privileged=True) "bash",
"-c",
"cd {p} && ls *.bin".format(
p="/var/lib/clickhouse/data/default/test/detached/2_0_0_0"
),
],
privileged=True,
)
logging.debug(f"Before deleting: {to_delete}") logging.debug(f"Before deleting: {to_delete}")
node_2.exec_in_container(['bash', '-c', node_2.exec_in_container(
'cd {p} && rm -fr *.bin'.format( [
p="/var/lib/clickhouse/data/default/test/detached/2_0_0_0")], privileged=True) "bash",
"-c",
"cd {p} && rm -fr *.bin".format(
p="/var/lib/clickhouse/data/default/test/detached/2_0_0_0"
),
],
privileged=True,
)
node_1.query("ALTER TABLE test ATTACH PARTITION 2") node_1.query("ALTER TABLE test ATTACH PARTITION 2")
check_data([node_1, node_2, node_3], detached_parts=[0, 1]) check_data([node_1, node_2, node_3], detached_parts=[0, 1])
@ -111,7 +145,9 @@ def test_attach_without_fetching(start_cluster):
# Replica 2 should also download the data from 1 as the checksums won't match. # Replica 2 should also download the data from 1 as the checksums won't match.
print("Checking attach with corrupted part data with all of the files present") print("Checking attach with corrupted part data with all of the files present")
corrupt_part_data_by_path(node_2, "/var/lib/clickhouse/data/default/test/detached/1_0_0_0") corrupt_part_data_by_path(
node_2, "/var/lib/clickhouse/data/default/test/detached/1_0_0_0"
)
node_1.query("ALTER TABLE test ATTACH PARTITION 1") node_1.query("ALTER TABLE test ATTACH PARTITION 1")
check_data([node_1, node_2, node_3], detached_parts=[0]) check_data([node_1, node_2, node_3], detached_parts=[0])
@ -123,8 +159,8 @@ def test_attach_without_fetching(start_cluster):
with PartitionManager() as pm: with PartitionManager() as pm:
# If something goes wrong and replica 2 wants to fetch data, the test will fail. # If something goes wrong and replica 2 wants to fetch data, the test will fail.
pm.partition_instances(node_2, node_1, action='REJECT --reject-with tcp-reset') pm.partition_instances(node_2, node_1, action="REJECT --reject-with tcp-reset")
pm.partition_instances(node_1, node_3, action='REJECT --reject-with tcp-reset') pm.partition_instances(node_1, node_3, action="REJECT --reject-with tcp-reset")
node_1.query("ALTER TABLE test ATTACH PART '0_0_0_0'") node_1.query("ALTER TABLE test ATTACH PART '0_0_0_0'")

View File

@ -2,7 +2,7 @@ import pytest
from helpers.cluster import ClickHouseCluster from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__) cluster = ClickHouseCluster(__file__)
instance = cluster.add_instance('instance') instance = cluster.add_instance("instance")
@pytest.fixture(scope="module", autouse=True) @pytest.fixture(scope="module", autouse=True)
@ -20,18 +20,30 @@ def setup_nodes():
def test_authentication_pass(): def test_authentication_pass():
assert instance.query("SELECT currentUser()", user='sasha') == 'sasha\n' assert instance.query("SELECT currentUser()", user="sasha") == "sasha\n"
assert instance.query("SELECT currentUser()", user='masha', password='qwerty') == 'masha\n' assert (
instance.query("SELECT currentUser()", user="masha", password="qwerty")
== "masha\n"
)
# 'no_password' authentication type allows to login with any password. # 'no_password' authentication type allows to login with any password.
assert instance.query("SELECT currentUser()", user='sasha', password='something') == 'sasha\n' assert (
assert instance.query("SELECT currentUser()", user='sasha', password='something2') == 'sasha\n' instance.query("SELECT currentUser()", user="sasha", password="something")
== "sasha\n"
)
assert (
instance.query("SELECT currentUser()", user="sasha", password="something2")
== "sasha\n"
)
def test_authentication_fail(): def test_authentication_fail():
# User doesn't exist. # User doesn't exist.
assert "vasya: Authentication failed" in instance.query_and_get_error("SELECT currentUser()", user='vasya') assert "vasya: Authentication failed" in instance.query_and_get_error(
"SELECT currentUser()", user="vasya"
)
# Wrong password. # Wrong password.
assert "masha: Authentication failed" in instance.query_and_get_error("SELECT currentUser()", user='masha', assert "masha: Authentication failed" in instance.query_and_get_error(
password='123') "SELECT currentUser()", user="masha", password="123"
)

View File

@ -17,12 +17,20 @@ CLUSTER_NAME = "test_cluster"
def cluster(): def cluster():
try: try:
cluster = ClickHouseCluster(__file__) cluster = ClickHouseCluster(__file__)
cluster.add_instance(NODE1, main_configs=["configs/config.d/storage_conf.xml"], macros={'replica': '1'}, cluster.add_instance(
with_azurite=True, NODE1,
with_zookeeper=True) main_configs=["configs/config.d/storage_conf.xml"],
cluster.add_instance(NODE2, main_configs=["configs/config.d/storage_conf.xml"], macros={'replica': '2'}, macros={"replica": "1"},
with_azurite=True, with_azurite=True,
with_zookeeper=True) with_zookeeper=True,
)
cluster.add_instance(
NODE2,
main_configs=["configs/config.d/storage_conf.xml"],
macros={"replica": "2"},
with_azurite=True,
with_zookeeper=True,
)
logging.info("Starting cluster...") logging.info("Starting cluster...")
cluster.start() cluster.start()
logging.info("Cluster started") logging.info("Cluster started")
@ -53,7 +61,10 @@ def create_table(node, table_name, replica, **additional_settings):
def get_large_objects_count(blob_container_client, large_size_threshold=100): def get_large_objects_count(blob_container_client, large_size_threshold=100):
return sum(blob['size'] > large_size_threshold for blob in blob_container_client.list_blobs()) return sum(
blob["size"] > large_size_threshold
for blob in blob_container_client.list_blobs()
)
def test_zero_copy_replication(cluster): def test_zero_copy_replication(cluster):
@ -61,15 +72,21 @@ def test_zero_copy_replication(cluster):
node2 = cluster.instances[NODE2] node2 = cluster.instances[NODE2]
create_table(node1, TABLE_NAME, 1) create_table(node1, TABLE_NAME, 1)
blob_container_client = cluster.blob_service_client.get_container_client(CONTAINER_NAME) blob_container_client = cluster.blob_service_client.get_container_client(
CONTAINER_NAME
)
values1 = "(0,'data'),(1,'data')" values1 = "(0,'data'),(1,'data')"
values2 = "(2,'data'),(3,'data')" values2 = "(2,'data'),(3,'data')"
node1.query(f"INSERT INTO {TABLE_NAME} VALUES {values1}") node1.query(f"INSERT INTO {TABLE_NAME} VALUES {values1}")
node2.query(f"SYSTEM SYNC REPLICA {TABLE_NAME}") node2.query(f"SYSTEM SYNC REPLICA {TABLE_NAME}")
assert node1.query(f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values") == values1 assert (
assert node2.query(f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values") == values1 node1.query(f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values") == values1
)
assert (
node2.query(f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values") == values1
)
# Based on version 21.x - should be only one file with size 100+ (checksums.txt), used by both nodes # Based on version 21.x - should be only one file with size 100+ (checksums.txt), used by both nodes
assert get_large_objects_count(blob_container_client) == 1 assert get_large_objects_count(blob_container_client) == 1
@ -77,7 +94,13 @@ def test_zero_copy_replication(cluster):
node2.query(f"INSERT INTO {TABLE_NAME} VALUES {values2}") node2.query(f"INSERT INTO {TABLE_NAME} VALUES {values2}")
node1.query(f"SYSTEM SYNC REPLICA {TABLE_NAME}") node1.query(f"SYSTEM SYNC REPLICA {TABLE_NAME}")
assert node2.query(f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values") == values1 + "," + values2 assert (
assert node1.query(f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values") == values1 + "," + values2 node2.query(f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values")
== values1 + "," + values2
)
assert (
node1.query(f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values")
== values1 + "," + values2
)
assert get_large_objects_count(blob_container_client) == 2 assert get_large_objects_count(blob_container_client) == 2

View File

@ -6,25 +6,35 @@ from helpers.cluster import ClickHouseCluster
from helpers.test_tools import TSV from helpers.test_tools import TSV
cluster = ClickHouseCluster(__file__) cluster = ClickHouseCluster(__file__)
instance = cluster.add_instance('node') instance = cluster.add_instance("node")
path_to_data = '/var/lib/clickhouse/' path_to_data = "/var/lib/clickhouse/"
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def started_cluster(): def started_cluster():
try: try:
cluster.start() cluster.start()
instance.query('CREATE DATABASE test ENGINE = Ordinary') # Different path in shadow/ with Atomic instance.query(
"CREATE DATABASE test ENGINE = Ordinary"
) # Different path in shadow/ with Atomic
instance.query("DROP TABLE IF EXISTS test.tbl") instance.query("DROP TABLE IF EXISTS test.tbl")
instance.query("CREATE TABLE test.tbl (p Date, k Int8) ENGINE = MergeTree PARTITION BY toYYYYMM(p) ORDER BY p") instance.query(
"CREATE TABLE test.tbl (p Date, k Int8) ENGINE = MergeTree PARTITION BY toYYYYMM(p) ORDER BY p"
)
for i in range(1, 4): for i in range(1, 4):
instance.query('INSERT INTO test.tbl (p, k) VALUES(toDate({}), {})'.format(i, i)) instance.query(
"INSERT INTO test.tbl (p, k) VALUES(toDate({}), {})".format(i, i)
)
for i in range(31, 34): for i in range(31, 34):
instance.query('INSERT INTO test.tbl (p, k) VALUES(toDate({}), {})'.format(i, i)) instance.query(
"INSERT INTO test.tbl (p, k) VALUES(toDate({}), {})".format(i, i)
)
expected = TSV('1970-01-02\t1\n1970-01-03\t2\n1970-01-04\t3\n1970-02-01\t31\n1970-02-02\t32\n1970-02-03\t33') expected = TSV(
"1970-01-02\t1\n1970-01-03\t2\n1970-01-04\t3\n1970-02-01\t31\n1970-02-02\t32\n1970-02-03\t33"
)
res = instance.query("SELECT * FROM test.tbl ORDER BY p") res = instance.query("SELECT * FROM test.tbl ORDER BY p")
assert (TSV(res) == expected) assert TSV(res) == expected
instance.query("ALTER TABLE test.tbl FREEZE") instance.query("ALTER TABLE test.tbl FREEZE")
@ -33,21 +43,24 @@ def started_cluster():
finally: finally:
cluster.shutdown() cluster.shutdown()
def get_last_backup_path(instance, database, table): def get_last_backup_path(instance, database, table):
fp_increment = os.path.join(path_to_data, 'shadow/increment.txt') fp_increment = os.path.join(path_to_data, "shadow/increment.txt")
increment = instance.exec_in_container(['cat', fp_increment]).strip() increment = instance.exec_in_container(["cat", fp_increment]).strip()
return os.path.join(path_to_data, 'shadow', increment, 'data', database, table) return os.path.join(path_to_data, "shadow", increment, "data", database, table)
def copy_backup_to_detached(instance, database, src_table, dst_table): def copy_backup_to_detached(instance, database, src_table, dst_table):
fp_backup = os.path.join(path_to_data, 'shadow', '*', 'data', database, src_table) fp_backup = os.path.join(path_to_data, "shadow", "*", "data", database, src_table)
fp_detached = os.path.join(path_to_data, 'data', database, dst_table, 'detached') fp_detached = os.path.join(path_to_data, "data", database, dst_table, "detached")
logging.debug(f'copy from {fp_backup} to {fp_detached}') logging.debug(f"copy from {fp_backup} to {fp_detached}")
instance.exec_in_container(['bash', '-c', f'cp -r {fp_backup} -T {fp_detached}']) instance.exec_in_container(["bash", "-c", f"cp -r {fp_backup} -T {fp_detached}"])
def test_restore(started_cluster): def test_restore(started_cluster):
instance.query("CREATE TABLE test.tbl1 AS test.tbl") instance.query("CREATE TABLE test.tbl1 AS test.tbl")
copy_backup_to_detached(started_cluster.instances['node'], 'test', 'tbl', 'tbl1') copy_backup_to_detached(started_cluster.instances["node"], "test", "tbl", "tbl1")
# The data_version of parts to be attached are larger than the newly created table's data_version. # The data_version of parts to be attached are larger than the newly created table's data_version.
instance.query("ALTER TABLE test.tbl1 ATTACH PARTITION 197001") instance.query("ALTER TABLE test.tbl1 ATTACH PARTITION 197001")
@ -55,17 +68,21 @@ def test_restore(started_cluster):
instance.query("SELECT sleep(2)") instance.query("SELECT sleep(2)")
# Validate the attached parts are identical to the backup. # Validate the attached parts are identical to the backup.
expected = TSV('1970-01-02\t1\n1970-01-03\t2\n1970-01-04\t3\n1970-02-01\t31\n1970-02-02\t32\n1970-02-03\t33') expected = TSV(
"1970-01-02\t1\n1970-01-03\t2\n1970-01-04\t3\n1970-02-01\t31\n1970-02-02\t32\n1970-02-03\t33"
)
res = instance.query("SELECT * FROM test.tbl1 ORDER BY p") res = instance.query("SELECT * FROM test.tbl1 ORDER BY p")
assert (TSV(res) == expected) assert TSV(res) == expected
instance.query("ALTER TABLE test.tbl1 UPDATE k=10 WHERE 1") instance.query("ALTER TABLE test.tbl1 UPDATE k=10 WHERE 1")
instance.query("SELECT sleep(2)") instance.query("SELECT sleep(2)")
# Validate mutation has been applied to all attached parts. # Validate mutation has been applied to all attached parts.
expected = TSV('1970-01-02\t10\n1970-01-03\t10\n1970-01-04\t10\n1970-02-01\t10\n1970-02-02\t10\n1970-02-03\t10') expected = TSV(
"1970-01-02\t10\n1970-01-03\t10\n1970-01-04\t10\n1970-02-01\t10\n1970-02-02\t10\n1970-02-03\t10"
)
res = instance.query("SELECT * FROM test.tbl1 ORDER BY p") res = instance.query("SELECT * FROM test.tbl1 ORDER BY p")
assert (TSV(res) == expected) assert TSV(res) == expected
instance.query("DROP TABLE IF EXISTS test.tbl1") instance.query("DROP TABLE IF EXISTS test.tbl1")
@ -73,15 +90,19 @@ def test_restore(started_cluster):
def test_attach_partition(started_cluster): def test_attach_partition(started_cluster):
instance.query("CREATE TABLE test.tbl2 AS test.tbl") instance.query("CREATE TABLE test.tbl2 AS test.tbl")
for i in range(3, 5): for i in range(3, 5):
instance.query('INSERT INTO test.tbl2(p, k) VALUES(toDate({}), {})'.format(i, i)) instance.query(
"INSERT INTO test.tbl2(p, k) VALUES(toDate({}), {})".format(i, i)
)
for i in range(33, 35): for i in range(33, 35):
instance.query('INSERT INTO test.tbl2(p, k) VALUES(toDate({}), {})'.format(i, i)) instance.query(
"INSERT INTO test.tbl2(p, k) VALUES(toDate({}), {})".format(i, i)
)
expected = TSV('1970-01-04\t3\n1970-01-05\t4\n1970-02-03\t33\n1970-02-04\t34') expected = TSV("1970-01-04\t3\n1970-01-05\t4\n1970-02-03\t33\n1970-02-04\t34")
res = instance.query("SELECT * FROM test.tbl2 ORDER BY p") res = instance.query("SELECT * FROM test.tbl2 ORDER BY p")
assert (TSV(res) == expected) assert TSV(res) == expected
copy_backup_to_detached(started_cluster.instances['node'], 'test', 'tbl', 'tbl2') copy_backup_to_detached(started_cluster.instances["node"], "test", "tbl", "tbl2")
# The data_version of parts to be attached # The data_version of parts to be attached
# - may be less than, equal to or larger than the current table's data_version. # - may be less than, equal to or larger than the current table's data_version.
@ -91,18 +112,20 @@ def test_attach_partition(started_cluster):
instance.query("SELECT sleep(2)") instance.query("SELECT sleep(2)")
expected = TSV( expected = TSV(
'1970-01-02\t1\n1970-01-03\t2\n1970-01-04\t3\n1970-01-04\t3\n1970-01-05\t4\n1970-02-01\t31\n1970-02-02\t32\n1970-02-03\t33\n1970-02-03\t33\n1970-02-04\t34') "1970-01-02\t1\n1970-01-03\t2\n1970-01-04\t3\n1970-01-04\t3\n1970-01-05\t4\n1970-02-01\t31\n1970-02-02\t32\n1970-02-03\t33\n1970-02-03\t33\n1970-02-04\t34"
)
res = instance.query("SELECT * FROM test.tbl2 ORDER BY p") res = instance.query("SELECT * FROM test.tbl2 ORDER BY p")
assert (TSV(res) == expected) assert TSV(res) == expected
instance.query("ALTER TABLE test.tbl2 UPDATE k=10 WHERE 1") instance.query("ALTER TABLE test.tbl2 UPDATE k=10 WHERE 1")
instance.query("SELECT sleep(2)") instance.query("SELECT sleep(2)")
# Validate mutation has been applied to all attached parts. # Validate mutation has been applied to all attached parts.
expected = TSV( expected = TSV(
'1970-01-02\t10\n1970-01-03\t10\n1970-01-04\t10\n1970-01-04\t10\n1970-01-05\t10\n1970-02-01\t10\n1970-02-02\t10\n1970-02-03\t10\n1970-02-03\t10\n1970-02-04\t10') "1970-01-02\t10\n1970-01-03\t10\n1970-01-04\t10\n1970-01-04\t10\n1970-01-05\t10\n1970-02-01\t10\n1970-02-02\t10\n1970-02-03\t10\n1970-02-03\t10\n1970-02-04\t10"
)
res = instance.query("SELECT * FROM test.tbl2 ORDER BY p") res = instance.query("SELECT * FROM test.tbl2 ORDER BY p")
assert (TSV(res) == expected) assert TSV(res) == expected
instance.query("DROP TABLE IF EXISTS test.tbl2") instance.query("DROP TABLE IF EXISTS test.tbl2")
@ -110,15 +133,19 @@ def test_attach_partition(started_cluster):
def test_replace_partition(started_cluster): def test_replace_partition(started_cluster):
instance.query("CREATE TABLE test.tbl3 AS test.tbl") instance.query("CREATE TABLE test.tbl3 AS test.tbl")
for i in range(3, 5): for i in range(3, 5):
instance.query('INSERT INTO test.tbl3(p, k) VALUES(toDate({}), {})'.format(i, i)) instance.query(
"INSERT INTO test.tbl3(p, k) VALUES(toDate({}), {})".format(i, i)
)
for i in range(33, 35): for i in range(33, 35):
instance.query('INSERT INTO test.tbl3(p, k) VALUES(toDate({}), {})'.format(i, i)) instance.query(
"INSERT INTO test.tbl3(p, k) VALUES(toDate({}), {})".format(i, i)
)
expected = TSV('1970-01-04\t3\n1970-01-05\t4\n1970-02-03\t33\n1970-02-04\t34') expected = TSV("1970-01-04\t3\n1970-01-05\t4\n1970-02-03\t33\n1970-02-04\t34")
res = instance.query("SELECT * FROM test.tbl3 ORDER BY p") res = instance.query("SELECT * FROM test.tbl3 ORDER BY p")
assert (TSV(res) == expected) assert TSV(res) == expected
copy_backup_to_detached(started_cluster.instances['node'], 'test', 'tbl', 'tbl3') copy_backup_to_detached(started_cluster.instances["node"], "test", "tbl", "tbl3")
# The data_version of parts to be copied # The data_version of parts to be copied
# - may be less than, equal to or larger than the current table data_version. # - may be less than, equal to or larger than the current table data_version.
@ -126,35 +153,56 @@ def test_replace_partition(started_cluster):
instance.query("ALTER TABLE test.tbl3 REPLACE PARTITION 197002 FROM test.tbl") instance.query("ALTER TABLE test.tbl3 REPLACE PARTITION 197002 FROM test.tbl")
instance.query("SELECT sleep(2)") instance.query("SELECT sleep(2)")
expected = TSV('1970-01-04\t3\n1970-01-05\t4\n1970-02-01\t31\n1970-02-02\t32\n1970-02-03\t33') expected = TSV(
"1970-01-04\t3\n1970-01-05\t4\n1970-02-01\t31\n1970-02-02\t32\n1970-02-03\t33"
)
res = instance.query("SELECT * FROM test.tbl3 ORDER BY p") res = instance.query("SELECT * FROM test.tbl3 ORDER BY p")
assert (TSV(res) == expected) assert TSV(res) == expected
instance.query("ALTER TABLE test.tbl3 UPDATE k=10 WHERE 1") instance.query("ALTER TABLE test.tbl3 UPDATE k=10 WHERE 1")
instance.query("SELECT sleep(2)") instance.query("SELECT sleep(2)")
# Validate mutation has been applied to all copied parts. # Validate mutation has been applied to all copied parts.
expected = TSV('1970-01-04\t10\n1970-01-05\t10\n1970-02-01\t10\n1970-02-02\t10\n1970-02-03\t10') expected = TSV(
"1970-01-04\t10\n1970-01-05\t10\n1970-02-01\t10\n1970-02-02\t10\n1970-02-03\t10"
)
res = instance.query("SELECT * FROM test.tbl3 ORDER BY p") res = instance.query("SELECT * FROM test.tbl3 ORDER BY p")
assert (TSV(res) == expected) assert TSV(res) == expected
instance.query("DROP TABLE IF EXISTS test.tbl3") instance.query("DROP TABLE IF EXISTS test.tbl3")
def test_freeze_in_memory(started_cluster): def test_freeze_in_memory(started_cluster):
instance.query("CREATE TABLE test.t_in_memory(a UInt32, s String) ENGINE = MergeTree ORDER BY a SETTINGS min_rows_for_compact_part = 1000") instance.query(
"CREATE TABLE test.t_in_memory(a UInt32, s String) ENGINE = MergeTree ORDER BY a SETTINGS min_rows_for_compact_part = 1000"
)
instance.query("INSERT INTO test.t_in_memory VALUES (1, 'a')") instance.query("INSERT INTO test.t_in_memory VALUES (1, 'a')")
instance.query("ALTER TABLE test.t_in_memory FREEZE") instance.query("ALTER TABLE test.t_in_memory FREEZE")
fp_backup = get_last_backup_path(started_cluster.instances['node'], 'test', 't_in_memory') fp_backup = get_last_backup_path(
part_path = fp_backup + '/all_1_1_0/' started_cluster.instances["node"], "test", "t_in_memory"
)
part_path = fp_backup + "/all_1_1_0/"
assert TSV(instance.query("SELECT part_type, is_frozen FROM system.parts WHERE database = 'test' AND table = 't_in_memory'")) == TSV("InMemory\t1\n") assert TSV(
instance.exec_in_container(['test', '-f', part_path + '/data.bin']) instance.query(
assert instance.exec_in_container(['cat', part_path + '/count.txt']).strip() == '1' "SELECT part_type, is_frozen FROM system.parts WHERE database = 'test' AND table = 't_in_memory'"
)
) == TSV("InMemory\t1\n")
instance.exec_in_container(["test", "-f", part_path + "/data.bin"])
assert instance.exec_in_container(["cat", part_path + "/count.txt"]).strip() == "1"
instance.query("CREATE TABLE test.t_in_memory_2(a UInt32, s String) ENGINE = MergeTree ORDER BY a") instance.query(
copy_backup_to_detached(started_cluster.instances['node'], 'test', 't_in_memory', 't_in_memory_2') "CREATE TABLE test.t_in_memory_2(a UInt32, s String) ENGINE = MergeTree ORDER BY a"
)
copy_backup_to_detached(
started_cluster.instances["node"], "test", "t_in_memory", "t_in_memory_2"
)
instance.query("ALTER TABLE test.t_in_memory_2 ATTACH PARTITION ID 'all'") instance.query("ALTER TABLE test.t_in_memory_2 ATTACH PARTITION ID 'all'")
assert TSV(instance.query("SELECT part_type FROM system.parts WHERE database = 'test' AND table = 't_in_memory_2'")) == TSV("Compact\n") assert TSV(
instance.query(
"SELECT part_type FROM system.parts WHERE database = 'test' AND table = 't_in_memory_2'"
)
) == TSV("Compact\n")
assert TSV(instance.query("SELECT a, s FROM test.t_in_memory_2")) == TSV("1\ta\n") assert TSV(instance.query("SELECT a, s FROM test.t_in_memory_2")) == TSV("1\ta\n")

View File

@ -4,14 +4,19 @@ import os.path
from helpers.cluster import ClickHouseCluster from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__) cluster = ClickHouseCluster(__file__)
instance = cluster.add_instance('instance', main_configs=["configs/backups_disk.xml"], external_dirs=["/backups/"]) instance = cluster.add_instance(
"instance", main_configs=["configs/backups_disk.xml"], external_dirs=["/backups/"]
)
def create_and_fill_table(engine="MergeTree"): def create_and_fill_table(engine="MergeTree"):
if engine == "MergeTree": if engine == "MergeTree":
engine = "MergeTree ORDER BY y PARTITION BY x%10" engine = "MergeTree ORDER BY y PARTITION BY x%10"
instance.query("CREATE DATABASE test") instance.query("CREATE DATABASE test")
instance.query(f"CREATE TABLE test.table(x UInt32, y String) ENGINE={engine}") instance.query(f"CREATE TABLE test.table(x UInt32, y String) ENGINE={engine}")
instance.query("INSERT INTO test.table SELECT number, toString(number) FROM numbers(100)") instance.query(
"INSERT INTO test.table SELECT number, toString(number) FROM numbers(100)"
)
@pytest.fixture(scope="module", autouse=True) @pytest.fixture(scope="module", autouse=True)
@ -32,6 +37,8 @@ def cleanup_after_test():
backup_id_counter = 0 backup_id_counter = 0
def new_backup_name(): def new_backup_name():
global backup_id_counter global backup_id_counter
backup_id_counter += 1 backup_id_counter += 1
@ -39,11 +46,13 @@ def new_backup_name():
def get_backup_dir(backup_name): def get_backup_dir(backup_name):
counter = int(backup_name.split(',')[1].strip("')/ ")) counter = int(backup_name.split(",")[1].strip("')/ "))
return os.path.join(instance.path, f'backups/{counter}') return os.path.join(instance.path, f"backups/{counter}")
@pytest.mark.parametrize("engine", ["MergeTree", "Log", "TinyLog", "StripeLog", "Memory"]) @pytest.mark.parametrize(
"engine", ["MergeTree", "Log", "TinyLog", "StripeLog", "Memory"]
)
def test_restore_table(engine): def test_restore_table(engine):
backup_name = new_backup_name() backup_name = new_backup_name()
create_and_fill_table(engine=engine) create_and_fill_table(engine=engine)
@ -58,7 +67,9 @@ def test_restore_table(engine):
assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n"
@pytest.mark.parametrize("engine", ["MergeTree", "Log", "TinyLog", "StripeLog", "Memory"]) @pytest.mark.parametrize(
"engine", ["MergeTree", "Log", "TinyLog", "StripeLog", "Memory"]
)
def test_restore_table_into_existing_table(engine): def test_restore_table_into_existing_table(engine):
backup_name = new_backup_name() backup_name = new_backup_name()
create_and_fill_table(engine=engine) create_and_fill_table(engine=engine)
@ -66,10 +77,14 @@ def test_restore_table_into_existing_table(engine):
assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n"
instance.query(f"BACKUP TABLE test.table TO {backup_name}") instance.query(f"BACKUP TABLE test.table TO {backup_name}")
instance.query(f"RESTORE TABLE test.table INTO test.table FROM {backup_name} SETTINGS throw_if_table_exists=0") instance.query(
f"RESTORE TABLE test.table INTO test.table FROM {backup_name} SETTINGS throw_if_table_exists=0"
)
assert instance.query("SELECT count(), sum(x) FROM test.table") == "200\t9900\n" assert instance.query("SELECT count(), sum(x) FROM test.table") == "200\t9900\n"
instance.query(f"RESTORE TABLE test.table INTO test.table FROM {backup_name} SETTINGS throw_if_table_exists=0") instance.query(
f"RESTORE TABLE test.table INTO test.table FROM {backup_name} SETTINGS throw_if_table_exists=0"
)
assert instance.query("SELECT count(), sum(x) FROM test.table") == "300\t14850\n" assert instance.query("SELECT count(), sum(x) FROM test.table") == "300\t14850\n"
@ -101,7 +116,9 @@ def test_backup_table_under_another_name():
def test_materialized_view(): def test_materialized_view():
backup_name = new_backup_name() backup_name = new_backup_name()
instance.query("CREATE MATERIALIZED VIEW mv_1(x UInt8) ENGINE=MergeTree ORDER BY tuple() POPULATE AS SELECT 1 AS x") instance.query(
"CREATE MATERIALIZED VIEW mv_1(x UInt8) ENGINE=MergeTree ORDER BY tuple() POPULATE AS SELECT 1 AS x"
)
instance.query(f"BACKUP TABLE mv_1 TO {backup_name}") instance.query(f"BACKUP TABLE mv_1 TO {backup_name}")
instance.query("DROP TABLE mv_1") instance.query("DROP TABLE mv_1")
@ -122,9 +139,13 @@ def test_incremental_backup():
instance.query("INSERT INTO test.table VALUES (65, 'a'), (66, 'b')") instance.query("INSERT INTO test.table VALUES (65, 'a'), (66, 'b')")
assert instance.query("SELECT count(), sum(x) FROM test.table") == "102\t5081\n" assert instance.query("SELECT count(), sum(x) FROM test.table") == "102\t5081\n"
instance.query(f"BACKUP TABLE test.table TO {incremental_backup_name} SETTINGS base_backup = {backup_name}") instance.query(
f"BACKUP TABLE test.table TO {incremental_backup_name} SETTINGS base_backup = {backup_name}"
)
instance.query(f"RESTORE TABLE test.table AS test.table2 FROM {incremental_backup_name}") instance.query(
f"RESTORE TABLE test.table AS test.table2 FROM {incremental_backup_name}"
)
assert instance.query("SELECT count(), sum(x) FROM test.table2") == "102\t5081\n" assert instance.query("SELECT count(), sum(x) FROM test.table2") == "102\t5081\n"
@ -135,14 +156,22 @@ def test_incremental_backup_after_renaming_table():
instance.query(f"BACKUP TABLE test.table TO {backup_name}") instance.query(f"BACKUP TABLE test.table TO {backup_name}")
instance.query("RENAME TABLE test.table TO test.table2") instance.query("RENAME TABLE test.table TO test.table2")
instance.query(f"BACKUP TABLE test.table2 TO {incremental_backup_name} SETTINGS base_backup = {backup_name}") instance.query(
f"BACKUP TABLE test.table2 TO {incremental_backup_name} SETTINGS base_backup = {backup_name}"
)
# Files in a base backup can be searched by checksum, so an incremental backup with a renamed table actually # Files in a base backup can be searched by checksum, so an incremental backup with a renamed table actually
# contains only its changed metadata. # contains only its changed metadata.
assert os.path.isdir(os.path.join(get_backup_dir(backup_name), 'metadata')) == True assert os.path.isdir(os.path.join(get_backup_dir(backup_name), "metadata")) == True
assert os.path.isdir(os.path.join(get_backup_dir(backup_name), 'data')) == True assert os.path.isdir(os.path.join(get_backup_dir(backup_name), "data")) == True
assert os.path.isdir(os.path.join(get_backup_dir(incremental_backup_name), 'metadata')) == True assert (
assert os.path.isdir(os.path.join(get_backup_dir(incremental_backup_name), 'data')) == False os.path.isdir(os.path.join(get_backup_dir(incremental_backup_name), "metadata"))
== True
)
assert (
os.path.isdir(os.path.join(get_backup_dir(incremental_backup_name), "data"))
== False
)
instance.query("DROP TABLE test.table2") instance.query("DROP TABLE test.table2")
instance.query(f"RESTORE TABLE test.table2 FROM {incremental_backup_name}") instance.query(f"RESTORE TABLE test.table2 FROM {incremental_backup_name}")
@ -153,13 +182,21 @@ def test_backup_not_found_or_already_exists():
backup_name = new_backup_name() backup_name = new_backup_name()
expected_error = "Backup .* not found" expected_error = "Backup .* not found"
assert re.search(expected_error, instance.query_and_get_error(f"RESTORE TABLE test.table AS test.table2 FROM {backup_name}")) assert re.search(
expected_error,
instance.query_and_get_error(
f"RESTORE TABLE test.table AS test.table2 FROM {backup_name}"
),
)
create_and_fill_table() create_and_fill_table()
instance.query(f"BACKUP TABLE test.table TO {backup_name}") instance.query(f"BACKUP TABLE test.table TO {backup_name}")
expected_error = "Backup .* already exists" expected_error = "Backup .* already exists"
assert re.search(expected_error, instance.query_and_get_error(f"BACKUP TABLE test.table TO {backup_name}")) assert re.search(
expected_error,
instance.query_and_get_error(f"BACKUP TABLE test.table TO {backup_name}"),
)
def test_file_engine(): def test_file_engine():
@ -194,7 +231,9 @@ def test_zip_archive():
assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n"
instance.query(f"BACKUP TABLE test.table TO {backup_name}") instance.query(f"BACKUP TABLE test.table TO {backup_name}")
assert os.path.isfile(os.path.join(os.path.join(instance.path, 'backups/archive.zip'))) assert os.path.isfile(
os.path.join(os.path.join(instance.path, "backups/archive.zip"))
)
instance.query("DROP TABLE test.table") instance.query("DROP TABLE test.table")
assert instance.query("EXISTS test.table") == "0\n" assert instance.query("EXISTS test.table") == "0\n"
@ -208,10 +247,14 @@ def test_zip_archive_with_settings():
create_and_fill_table() create_and_fill_table()
assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n"
instance.query(f"BACKUP TABLE test.table TO {backup_name} SETTINGS compression_method='lzma', compression_level=3, password='qwerty'") instance.query(
f"BACKUP TABLE test.table TO {backup_name} SETTINGS compression_method='lzma', compression_level=3, password='qwerty'"
)
instance.query("DROP TABLE test.table") instance.query("DROP TABLE test.table")
assert instance.query("EXISTS test.table") == "0\n" assert instance.query("EXISTS test.table") == "0\n"
instance.query(f"RESTORE TABLE test.table FROM {backup_name} SETTINGS password='qwerty'") instance.query(
f"RESTORE TABLE test.table FROM {backup_name} SETTINGS password='qwerty'"
)
assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n"

View File

@ -4,13 +4,31 @@ from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__) cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', with_zookeeper=True, image='yandex/clickhouse-server', tag='19.4.5.35', node1 = cluster.add_instance(
stay_alive=True, with_installed_binary=True) "node1",
node2 = cluster.add_instance('node2', with_zookeeper=True, image='yandex/clickhouse-server', tag='19.4.5.35', with_zookeeper=True,
stay_alive=True, with_installed_binary=True) image="yandex/clickhouse-server",
node3 = cluster.add_instance('node3', with_zookeeper=True, image='yandex/clickhouse-server', tag='19.4.5.35', tag="19.4.5.35",
stay_alive=True, with_installed_binary=True) stay_alive=True,
node4 = cluster.add_instance('node4') with_installed_binary=True,
)
node2 = cluster.add_instance(
"node2",
with_zookeeper=True,
image="yandex/clickhouse-server",
tag="19.4.5.35",
stay_alive=True,
with_installed_binary=True,
)
node3 = cluster.add_instance(
"node3",
with_zookeeper=True,
image="yandex/clickhouse-server",
tag="19.4.5.35",
stay_alive=True,
with_installed_binary=True,
)
node4 = cluster.add_instance("node4")
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@ -24,7 +42,9 @@ def started_cluster():
def test_backup_from_old_version(started_cluster): def test_backup_from_old_version(started_cluster):
node1.query("CREATE TABLE source_table(A Int64, B String) Engine = MergeTree order by tuple()") node1.query(
"CREATE TABLE source_table(A Int64, B String) Engine = MergeTree order by tuple()"
)
node1.query("INSERT INTO source_table VALUES(1, '1')") node1.query("INSERT INTO source_table VALUES(1, '1')")
@ -37,14 +57,24 @@ def test_backup_from_old_version(started_cluster):
node1.restart_with_latest_version() node1.restart_with_latest_version()
node1.query( node1.query(
"CREATE TABLE dest_table (A Int64, B String, Y String) ENGINE = ReplicatedMergeTree('/test/dest_table1', '1') ORDER BY tuple()") "CREATE TABLE dest_table (A Int64, B String, Y String) ENGINE = ReplicatedMergeTree('/test/dest_table1', '1') ORDER BY tuple()"
)
node1.query("INSERT INTO dest_table VALUES(2, '2', 'Hello')") node1.query("INSERT INTO dest_table VALUES(2, '2', 'Hello')")
assert node1.query("SELECT COUNT() FROM dest_table") == "1\n" assert node1.query("SELECT COUNT() FROM dest_table") == "1\n"
node1.exec_in_container(['find', '/var/lib/clickhouse/shadow/1/data/default/source_table']) node1.exec_in_container(
node1.exec_in_container(['cp', '-r', '/var/lib/clickhouse/shadow/1/data/default/source_table/all_1_1_0/', '/var/lib/clickhouse/data/default/dest_table/detached']) ["find", "/var/lib/clickhouse/shadow/1/data/default/source_table"]
)
node1.exec_in_container(
[
"cp",
"-r",
"/var/lib/clickhouse/shadow/1/data/default/source_table/all_1_1_0/",
"/var/lib/clickhouse/data/default/dest_table/detached",
]
)
assert node1.query("SELECT COUNT() FROM dest_table") == "1\n" assert node1.query("SELECT COUNT() FROM dest_table") == "1\n"
@ -62,7 +92,9 @@ def test_backup_from_old_version(started_cluster):
def test_backup_from_old_version_setting(started_cluster): def test_backup_from_old_version_setting(started_cluster):
node2.query("CREATE TABLE source_table(A Int64, B String) Engine = MergeTree order by tuple()") node2.query(
"CREATE TABLE source_table(A Int64, B String) Engine = MergeTree order by tuple()"
)
node2.query("INSERT INTO source_table VALUES(1, '1')") node2.query("INSERT INTO source_table VALUES(1, '1')")
@ -75,13 +107,21 @@ def test_backup_from_old_version_setting(started_cluster):
node2.restart_with_latest_version() node2.restart_with_latest_version()
node2.query( node2.query(
"CREATE TABLE dest_table (A Int64, B String, Y String) ENGINE = ReplicatedMergeTree('/test/dest_table2', '1') ORDER BY tuple() SETTINGS enable_mixed_granularity_parts = 1") "CREATE TABLE dest_table (A Int64, B String, Y String) ENGINE = ReplicatedMergeTree('/test/dest_table2', '1') ORDER BY tuple() SETTINGS enable_mixed_granularity_parts = 1"
)
node2.query("INSERT INTO dest_table VALUES(2, '2', 'Hello')") node2.query("INSERT INTO dest_table VALUES(2, '2', 'Hello')")
assert node2.query("SELECT COUNT() FROM dest_table") == "1\n" assert node2.query("SELECT COUNT() FROM dest_table") == "1\n"
node2.exec_in_container(['cp', '-r', '/var/lib/clickhouse/shadow/1/data/default/source_table/all_1_1_0/', '/var/lib/clickhouse/data/default/dest_table/detached']) node2.exec_in_container(
[
"cp",
"-r",
"/var/lib/clickhouse/shadow/1/data/default/source_table/all_1_1_0/",
"/var/lib/clickhouse/data/default/dest_table/detached",
]
)
assert node2.query("SELECT COUNT() FROM dest_table") == "1\n" assert node2.query("SELECT COUNT() FROM dest_table") == "1\n"
@ -99,7 +139,9 @@ def test_backup_from_old_version_setting(started_cluster):
def test_backup_from_old_version_config(started_cluster): def test_backup_from_old_version_config(started_cluster):
node3.query("CREATE TABLE source_table(A Int64, B String) Engine = MergeTree order by tuple()") node3.query(
"CREATE TABLE source_table(A Int64, B String) Engine = MergeTree order by tuple()"
)
node3.query("INSERT INTO source_table VALUES(1, '1')") node3.query("INSERT INTO source_table VALUES(1, '1')")
@ -110,19 +152,29 @@ def test_backup_from_old_version_config(started_cluster):
node3.query("ALTER TABLE source_table FREEZE PARTITION tuple();") node3.query("ALTER TABLE source_table FREEZE PARTITION tuple();")
def callback(n): def callback(n):
n.replace_config("/etc/clickhouse-server/merge_tree_settings.xml", n.replace_config(
"<clickhouse><merge_tree><enable_mixed_granularity_parts>1</enable_mixed_granularity_parts></merge_tree></clickhouse>") "/etc/clickhouse-server/merge_tree_settings.xml",
"<clickhouse><merge_tree><enable_mixed_granularity_parts>1</enable_mixed_granularity_parts></merge_tree></clickhouse>",
)
node3.restart_with_latest_version(callback_onstop=callback) node3.restart_with_latest_version(callback_onstop=callback)
node3.query( node3.query(
"CREATE TABLE dest_table (A Int64, B String, Y String) ENGINE = ReplicatedMergeTree('/test/dest_table3', '1') ORDER BY tuple() SETTINGS enable_mixed_granularity_parts = 1") "CREATE TABLE dest_table (A Int64, B String, Y String) ENGINE = ReplicatedMergeTree('/test/dest_table3', '1') ORDER BY tuple() SETTINGS enable_mixed_granularity_parts = 1"
)
node3.query("INSERT INTO dest_table VALUES(2, '2', 'Hello')") node3.query("INSERT INTO dest_table VALUES(2, '2', 'Hello')")
assert node3.query("SELECT COUNT() FROM dest_table") == "1\n" assert node3.query("SELECT COUNT() FROM dest_table") == "1\n"
node3.exec_in_container(['cp', '-r', '/var/lib/clickhouse/shadow/1/data/default/source_table/all_1_1_0/', '/var/lib/clickhouse/data/default/dest_table/detached']) node3.exec_in_container(
[
"cp",
"-r",
"/var/lib/clickhouse/shadow/1/data/default/source_table/all_1_1_0/",
"/var/lib/clickhouse/data/default/dest_table/detached",
]
)
assert node3.query("SELECT COUNT() FROM dest_table") == "1\n" assert node3.query("SELECT COUNT() FROM dest_table") == "1\n"
@ -140,9 +192,13 @@ def test_backup_from_old_version_config(started_cluster):
def test_backup_and_alter(started_cluster): def test_backup_and_alter(started_cluster):
node4.query("CREATE DATABASE test ENGINE=Ordinary") # Different path in shadow/ with Atomic node4.query(
"CREATE DATABASE test ENGINE=Ordinary"
) # Different path in shadow/ with Atomic
node4.query("CREATE TABLE test.backup_table(A Int64, B String, C Date) Engine = MergeTree order by tuple()") node4.query(
"CREATE TABLE test.backup_table(A Int64, B String, C Date) Engine = MergeTree order by tuple()"
)
node4.query("INSERT INTO test.backup_table VALUES(2, '2', toDate('2019-10-01'))") node4.query("INSERT INTO test.backup_table VALUES(2, '2', toDate('2019-10-01'))")
@ -154,7 +210,14 @@ def test_backup_and_alter(started_cluster):
node4.query("ALTER TABLE test.backup_table DROP PARTITION tuple()") node4.query("ALTER TABLE test.backup_table DROP PARTITION tuple()")
node4.exec_in_container(['cp', '-r', '/var/lib/clickhouse/shadow/1/data/test/backup_table/all_1_1_0/', '/var/lib/clickhouse/data/test/backup_table/detached']) node4.exec_in_container(
[
"cp",
"-r",
"/var/lib/clickhouse/shadow/1/data/test/backup_table/all_1_1_0/",
"/var/lib/clickhouse/data/test/backup_table/detached",
]
)
node4.query("ALTER TABLE test.backup_table ATTACH PARTITION tuple()") node4.query("ALTER TABLE test.backup_table ATTACH PARTITION tuple()")

View File

@ -3,20 +3,29 @@ import pytest
from helpers.cluster import ClickHouseCluster from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__) cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', with_zookeeper=True, image='yandex/clickhouse-server', tag='19.17.8.54', stay_alive=True, with_installed_binary=True) node1 = cluster.add_instance(
node2 = cluster.add_instance('node2', main_configs=['configs/wide_parts_only.xml'], with_zookeeper=True) "node1",
with_zookeeper=True,
image="yandex/clickhouse-server",
tag="19.17.8.54",
stay_alive=True,
with_installed_binary=True,
)
node2 = cluster.add_instance(
"node2", main_configs=["configs/wide_parts_only.xml"], with_zookeeper=True
)
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def start_cluster(): def start_cluster():
try: try:
cluster.start() cluster.start()
create_query = '''CREATE TABLE t(date Date, id UInt32) create_query = """CREATE TABLE t(date Date, id UInt32)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/t', '{}') ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/t', '{}')
PARTITION BY toYYYYMM(date) PARTITION BY toYYYYMM(date)
ORDER BY id''' ORDER BY id"""
node1.query(create_query.format(1)) node1.query(create_query.format(1))
node1.query("DETACH TABLE t") # stop being leader node1.query("DETACH TABLE t") # stop being leader
node2.query(create_query.format(2)) node2.query(create_query.format(2))
node1.query("ATTACH TABLE t") node1.query("ATTACH TABLE t")
yield cluster yield cluster

View File

@ -3,9 +3,15 @@ import pytest
from helpers.cluster import ClickHouseCluster from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__, name="aggregate_fixed_key") cluster = ClickHouseCluster(__file__, name="aggregate_fixed_key")
node1 = cluster.add_instance('node1', with_zookeeper=True, image='yandex/clickhouse-server', tag='21.3', with_installed_binary=True) node1 = cluster.add_instance(
node2 = cluster.add_instance('node2', with_zookeeper=True) "node1",
node3 = cluster.add_instance('node3', with_zookeeper=True) with_zookeeper=True,
image="yandex/clickhouse-server",
tag="21.3",
with_installed_binary=True,
)
node2 = cluster.add_instance("node2", with_zookeeper=True)
node3 = cluster.add_instance("node3", with_zookeeper=True)
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@ -38,8 +44,9 @@ def test_two_level_merge(start_cluster):
# covers only the keys64 method # covers only the keys64 method
for node in start_cluster.instances.values(): for node in start_cluster.instances.values():
print(node.query( print(
""" node.query(
"""
SELECT SELECT
throwIf(uniqExact(date) != count(), 'group by is borked') throwIf(uniqExact(date) != count(), 'group by is borked')
FROM ( FROM (
@ -58,4 +65,5 @@ def test_two_level_merge(start_cluster):
max_threads = 2, max_threads = 2,
prefer_localhost_replica = 0 prefer_localhost_replica = 0
""" """
)) )
)

View File

@ -3,14 +3,24 @@ import pytest
from helpers.cluster import ClickHouseCluster from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__, name="aggregate_state") cluster = ClickHouseCluster(__file__, name="aggregate_state")
node1 = cluster.add_instance('node1', node1 = cluster.add_instance(
with_zookeeper=False, image='yandex/clickhouse-server', tag='19.16.9.37', stay_alive=True, "node1",
with_installed_binary=True) with_zookeeper=False,
node2 = cluster.add_instance('node2', image="yandex/clickhouse-server",
with_zookeeper=False, image='yandex/clickhouse-server', tag='19.16.9.37', stay_alive=True, tag="19.16.9.37",
with_installed_binary=True) stay_alive=True,
node3 = cluster.add_instance('node3', with_zookeeper=False) with_installed_binary=True,
node4 = cluster.add_instance('node4', with_zookeeper=False) )
node2 = cluster.add_instance(
"node2",
with_zookeeper=False,
image="yandex/clickhouse-server",
tag="19.16.9.37",
stay_alive=True,
with_installed_binary=True,
)
node3 = cluster.add_instance("node3", with_zookeeper=False)
node4 = cluster.add_instance("node4", with_zookeeper=False)
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@ -27,6 +37,7 @@ def start_cluster():
# TODO Implement versioning of serialization format for aggregate function states. # TODO Implement versioning of serialization format for aggregate function states.
# NOTE This test is too ad-hoc. # NOTE This test is too ad-hoc.
def test_backward_compatability(start_cluster): def test_backward_compatability(start_cluster):
node1.query("create table tab (x UInt64) engine = Memory") node1.query("create table tab (x UInt64) engine = Memory")
node2.query("create table tab (x UInt64) engine = Memory") node2.query("create table tab (x UInt64) engine = Memory")
@ -38,24 +49,34 @@ def test_backward_compatability(start_cluster):
node3.query("INSERT INTO tab VALUES (3)") node3.query("INSERT INTO tab VALUES (3)")
node4.query("INSERT INTO tab VALUES (4)") node4.query("INSERT INTO tab VALUES (4)")
assert (node1.query("SELECT avg(x) FROM remote('node{1..4}', default, tab)") == '2.5\n') assert (
assert (node2.query("SELECT avg(x) FROM remote('node{1..4}', default, tab)") == '2.5\n') node1.query("SELECT avg(x) FROM remote('node{1..4}', default, tab)") == "2.5\n"
assert (node3.query("SELECT avg(x) FROM remote('node{1..4}', default, tab)") == '2.5\n') )
assert (node4.query("SELECT avg(x) FROM remote('node{1..4}', default, tab)") == '2.5\n') assert (
node2.query("SELECT avg(x) FROM remote('node{1..4}', default, tab)") == "2.5\n"
)
assert (
node3.query("SELECT avg(x) FROM remote('node{1..4}', default, tab)") == "2.5\n"
)
assert (
node4.query("SELECT avg(x) FROM remote('node{1..4}', default, tab)") == "2.5\n"
)
# Also check with persisted aggregate function state # Also check with persisted aggregate function state
node1.query("create table state (x AggregateFunction(avg, UInt64)) engine = Log") node1.query("create table state (x AggregateFunction(avg, UInt64)) engine = Log")
node1.query("INSERT INTO state SELECT avgState(arrayJoin(CAST([1, 2, 3, 4] AS Array(UInt64))))") node1.query(
"INSERT INTO state SELECT avgState(arrayJoin(CAST([1, 2, 3, 4] AS Array(UInt64))))"
)
assert (node1.query("SELECT avgMerge(x) FROM state") == '2.5\n') assert node1.query("SELECT avgMerge(x) FROM state") == "2.5\n"
node1.restart_with_latest_version() node1.restart_with_latest_version()
assert (node1.query("SELECT avgMerge(x) FROM state") == '2.5\n') assert node1.query("SELECT avgMerge(x) FROM state") == "2.5\n"
node1.query("drop table tab") node1.query("drop table tab")
node1.query("drop table state") node1.query("drop table state")
node2.query("drop table tab") node2.query("drop table tab")
node3.query("drop table tab") node3.query("drop table tab")
node4.query("drop table tab") node4.query("drop table tab")

Some files were not shown because too many files have changed in this diff Show More